summaryrefslogblamecommitdiff
path: root/scripts/bootstrap.sh
blob: 6948801a8ce1a7a3599b420b644af67f5bf98f29 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
            
 
                                                                







                                                                 









                                                                 



































































































                                                                 

                                                               
                                                                










                                                                
                                                                








                                                                



                                                                




                                                                








                                                                
                


























                                                                 
                




                                                                 
 

             
 







                                                                
 

                                                                
 



                                                                
 
                                      
 



                                                                


          

        
                                            
             
                                                        
 

                                                         


           
 
 
  
               
 






                                                                 







                                                  



               
















                                                                


                                                          
 














                                                               
                                                                       

                                 




                                                                        

                                                                
                 







                                                                
 

                                                                 



                                                                
                                                          





                                                                


                                                                








                                                                












                                                            










                                                                
                                                                




                                                                

 


                                                                
  








                                                                
 
                                                              
 

                                                                  





                                                                


                                                              

                                         
                      
     
                                                                



                                                                



                                                                
         







                                                                
  

 

                                                                

  
                                                        
















                                                                

     
 

                                                                

  





                                                                







                                                               






                                                                
                                









                                                                
                                                                
     

 

                                                                
 
      
 
 

                                                                
 
      
 

                                                        
#!/bin/sh -e

#===============================================================
# Filename : scripts/bootstrap.sh (part of Adélie package repo).
# Purpose  : Bootstraps Adélie from source for any architecture.
# Authors  : Zach van Rijn <me@zv.io>
# License  : MIT
# Revision : 20221206
#===============================================================

#===============================================================
# README
#===============================================================
#
# overview
# --------
#
# Given a basic development environment ("Baseline System") that
# contains standard system utilities, this script bootstraps the
# Adélie Linux distribution for any suported target architecture
# (assuming that musl, gcc, etc. have been ported to it) without
# requiring 'root' privileges. This process is slow* and costly,
# but demonstrates that bootstrapping from source is possible.
#
# Effort has been focused on correctness instead of performance.
# This means, some files can be deleted sooner and some of the
# binaries can run natively on the build machine. We know and do
# appreciate there are different opinions on how to approach the
# bootstrap process. This meets our needs and hopefully offers a
# different perspective.
#
# This process takes up a lot of TIME and SPACE, and this cannot
# be improved by throwing hardware at it. This is primarily due
# to the tradeoff of not requiring root privileges at any point,
# and the decision to emulate a native environment instead of to
# force explicit cross-compilation at each step in the process.
#
# (*) See the "requirements" section for mitigations/discussion.
#
#
# features
# --------
#
#   * One-click bootstrap. Just start the script and walk away.
#
#       $ ./scripts/bootstrap.sh ppc64 /path/to/new/scratch/dir
#
#   * Can be done without root privileges: no 'chroot(8)'.
#
#   * This is the real deal. No "seed" binaries are required to
#     go from start to finish. The final output is what we use
#     to set up our official build servers totally* from source.
#
#   * Can be done on a wide range of platforms, almost certainly
#     those supported by mcmtools (want to contribute???).
#
#   * Minimal dependencies. Nothing fancy. Shell scripts only.
#
#   * Can be adapted to assist with porting Adélie to new target
#     architectures. TODO: write a tool to automatically do it.
#
# (*) We don't count binaries in your starting environment, and
#     recommend https://www.gnu.org/software/mes/ for the brave.
#
#
# requirements
# ------------
#
# TL;DR: You must be able to run the mcmtools bootstrap script.
#
# mcmtools is a hard dependency for our bootstrap process now:
#
#     https://git.zv.io/toolchains/bootstrap
#
# It is a simple analog to Buildroot or Yocto. Those tools could
# be used, too, and would provide the "host rootfs" environment.
#
# Internet access is required, but (as an exercise left to the
# reader) it is possible to pre-download all required sources if
# you provide your own rootfs for the static QEMU build process.
#
# If you are in a position to use native hardware, then you are
# able to get away with only the final stages of bootstrapping.
# To do this, you'd essentially comment out the first stages or
# copy the results of the first stages elsewhere and continue.
#
# Other requirements that you should be aware of, estimated:
#
#   * As many CPU cores as you can throw at it;
#
#   * ~15 GB for each rootfs ("seed" and "host"), so ~30GB;
#
#   * ~ 3 GB for toolchains;
#
#   * ~ 2 GB for the "system/" package repository, when built;
#
#   * Please refer to the README in 'bootstrap', linked above,
#     for more information about performance. Most of this can
#     be gained back if you adapt this script to use 'chroot(8)'
#     instead of 'PRoot', at the expense of requiring privilege,
#     with the correct registration of QEMU with 'binfmt_misc'.
#
#     In brief, there is an approximate factor of 13 slowdown on
#     practical workloads when using QEMU user and 'PRoot' to do
#     dynamic binary translation and emulate a foreign machine.
#
#
# process
# -------
#
# The illustration below outlines the complete bootstrap process
# and roughly corresponds to the script layout/organization. Our
# terminology is not perfectly consistent; please excuse this.
#
# The term "build"  is shorthand for "build-native"   CPU, which
# is the machine on which you are performing the bootstrap.
#
# The term "native" is shorthand for "foreign-native" CPU, which
# is the machine to which you are targeting  the bootstrap, and
# relates to the "host" CPU on which the code will run, but that
# with the help of dynamic binary translation, runs "natively".
#
#     - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#     build              (unstable, everything provided by user)
#
#     +-----------------+       User-provided tools. Dependency
#     | Baseline System |       of 'mcmtools', which will verify
#     +-----------------+       that these tools are available.
#              |
#        +----------+           Script to build pinned versions
#        | mcmtools |           of common system utilities, a
#        +----------+           host-arch host-libc toolchain, a
#              |                host-arch musl-libc toolchain...
#              |
#     - - - - -|- - - - - - - - - - - - - - - - - - - - - - - -
#     mixed    |                (stable versions, unstable libc)
#              |
#       +-------------+         ...and a 'chroot'-able rootfs. A
#       | seed rootfs |         sane, but not clean, environment
#       +-------------+         in which we begin the bootstrap.
#              |
#        +-----------+          Script to build 'PRoot' and its
#        | emulators |          dependencies, as well as static
#        +-----------+          QEMU user binaries. Add to seed.
#              |
#     +-----------------+       Static musl toolchains targeting
#     | musl toolchains |       a given architecture: cross from
#     +-----------------+       the host CPU & foreign "native".
#              |                Output binaries will run via the
#              |                'binfmt_misc' mechanism + QEMU.
#              |
#       +-------------+         Script to build Alpine Package
#       | build tools |         Keeper (APK) and dependencies.
#       +-------------+         All binaries are cross-compiled!
#              |                This step is a sanity check.
#              |
#        +----------+           Script to build pinned versions
#        | mcmtools |           of common system utilities. This
#        +----------+           time, all utilities are target-
#              |                native. Build more toolchains...
#              |
#     - - - - -|- - - - - - - - - - - - - - - - - - - - - - - -
#     host     |                (stable versions,   stable libc)
#              |
#       +-------------+         ...and a 'chroot'-able rootfs. A
#       | host rootfs |         sane, clean, foreign "native"
#       +-------------+         rootfs that requires QEMU, or is
#              |                able to run on native hardware.
#              |
#       +-------------+         Script to build Alpine Package
#       | build tools |         Keeper (APK) and dependencies.
#       +-------------+         These binaries are native built!
#              |                This step is required!
#              |
#       +-------------+         Script to build the Adélie Linux
#       | system repo |         "system/" package repository. It
#       +-------------+         is used to build core packages.
#              |
#      +---------------+        Script to install packages into
#      | image creator |        a clean rootfs. Carryover from
#      +---------------+        the mcmtools process is removed.
#              |
#     - - - - -|- - - - - - - - - - - - - - - - - - - - - - - -
#     verify   |                         (optional verification)
#              |
#             ...               The "host" stage can be repeated
#              |                zero or more times to ensure the
#              |                final image is not contaminated.
#              |
#     - - - - -|- - - - - - - - - - - - - - - - - - - - - - - -
#     output   |                       (Welcome to Adélie Linux)
#              |
#      +---------------+        This is a minimal Adélie rootfs.
#      | adelie rootfs |        Copy it to native hardware, then
#      +---------------+        use it as a builder 'chroot'. It
#                               is used to seed official Adélie
#                               build boxen (e.g. autobuilder).
#
# limitations
# -----------
#
# The builds are not hermetically sealed. That is not the point.
# The build environment should not *need* anything from outside,
# but it is not *prevented* from accessing anything. You should
# run this on a clean, trusted machine.
#
# There is no guarantee of byte-for-byte reproducible builds at
# this time. This is, in part, due to timestamps and tar headers
# but may involve a lack of determinism in parallel builds.
#
# QEMU user emulation may cause subtle incompatibilites with the
# target CPU hardware. For example, CPU feature (mis)detection.
#
# It is not currently possible to safely reuse any of the output
# if the target architecture is changed. You will need to start
# from scratch if you wish to change the target. This is a TODO.
# 

HERE="$(dirname $(readlink -f ${0}))";


#---------------------------------------------------------------
# initialization

##
# Haaaalp!
#
usage ()
{
    printf "Usage: %s ARCH BASE\n\n" "${0}";
    cat <<EOF
    ARCH    { aarch64, armv7, ppc64, ppc, x86_64, pmmx }

    BASE    an absolute path to bootstrap out of.
            ** MAY CAUSE DATA LOSS IF SET INCORRECTLY! **
EOF
    exit 0;
}


##
# argv[1]: ARCH
#
# ARCH is the Adélie Linux target. This is the first step in the
# porting process, so e.g. mips64, sparc64, riscv64 will need to
# be added to this table when the time comes to port to them.
#
# ARCH is translated to canonical GCC and QEMU machine types.
#
case "${1}" in
#   adelie        gcc           qemu
#   ------        ---           ----
    aarch64)    m=aarch64   ;   q=aarch64   ;   ;;
    armv7)      m=armv7l    ;   q=arm       ;   ;;
    ppc)        m=powerpc   ;   q=ppc       ;   ;;
    ppc64)      m=powerpc64 ;   q=ppc64     ;   ;;
    x86_64)     m=x86_64    ;   q=x86_64    ;   ;;
    pmmx)       m=i586      ;   q=i386      ;   ;;
    *) usage ;;
esac
shift;


##
# argv[2]: BASE
#
# BASE is a semi-permanent scratch directory. It is where all of
# the magic happens, and probably cannot be relocated easily. Be
# careful to not set it incorrectly or to a place you'd regret
# being overwritten, corrupted, or deleted.
#
case "${1}" in
    /*) BASE="${1}";
        ;;
    *)  printf "BASE not set, or not an absolute path!\n";
        exit 1;
        ;;
esac


##
# Internal variables. Do not modify this section directly.
#
CHAINS=https://git.zv.io/toolchains;
printf "CHAINS=%s\n" "${CHAINS}";

SYSTEM="-linux-musl";           # we only target musl on Linux
printf "SYSTEM=%s\n" "${SYSTEM}";

NATIVE=$(cc -dumpmachine);      # host arch, host libc
printf "NATIVE=%s\n" "${NATIVE}";

BUILDS="${NATIVE%%-*}${SYSTEM}"; # host arch, musl libc
printf "BUILDS=%s\n" "${BUILDS}";

TARGET="${m}${SYSTEM}";         # ultimate Adélie Linux target
printf "TARGET=%s\n" "${TARGET}";

MTOOLS=${MCMTOOLS:-"${BASE}/mcmtools"}; # CAREFUL! MAY CAUSE DATA LOSS!
printf "MTOOLS=%s\n" "${MTOOLS}";

##
# Default 'PATH' for use inside various rootfs environments.
#
DEF_PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";


#---------------------------------------------------------------
# mcmtools (seed)

##
# Allow the user to supply an existing mcmtools installation. It
# is not ideal, but since mcmtools cannot be (easily) relocated,
# allow the user to save some CPU cycles at the cost of adding a
# bunch of tools to their existing installation. A temporary dir
# is used if this environment variable is omitted. Another case
# for providing a custom value is if '/tmp' is mounted weird.
#
if ! test -d "${MTOOLS}/sys/emus/bin"; then # FIXME: no hard code
    cd "${BASE}";

    test -d bootstrap                                          \
        || git clone ${CHAINS}/bootstrap.git;
    cd bootstrap;
    git checkout d148555321a391fd6f23a289fa51c2dfa26f4b10;

    ## seed rootfs
    #
    # This will build absolutely everything that is needed to be
    # self-reliant, except for some build deps for QEMU.
    #
    # We copy 'config.mak' from mcmtools bootstrap to the rootfs
    # so that when we build "real" toolchain they are the same.
    #
    test -d "${MTOOLS}" ||                                     \
    DEST="${MTOOLS}"                                           \
    ARCH=${BUILDS}                                             \
        ./bootstrap                                            \
        ;
    test -f "${MTOOLS}"/config.mak ||                          \
        cp "${MTOOLS}"/tmp/musl-cross-make/config.mak          \
        "${MTOOLS}"/config.mak                                 \
        ;
    #rm -fr "${MTOOLS}"/tmp; # save 10 GB (FIXME: make safe)

    # is any of this actually needed?
    (
        cd "${MTOOLS}"/sys;
        mkdir -p dev;
        mkdir -p proc;
        mkdir -p sys;
        rm -fr usr;
        ln -s . usr;
    )

    ## emulators
    #
    # Dependencies are built with the mcmtools host toolchain; a
    # reason to not force musl here is in the event that these
    # cannot be built statically and the host libc is different.
    # Our priority is to obtain a functioning 'PRoot' above all.
    #
    # QEMU itself is built inside an Alpine Linux rootfs; we do
    # this because we still need Python 3 to build it. You can
    # manually provide your own static QEMU user binaries and be
    # on your way without Alpine, but it is a good 'PRoot' test.
    #
    test -d "${MTOOLS}/sys/emus/bin" ||                        \
    PATH="${MTOOLS}/host/bin:${MTOOLS}/sys/bin"                \
    DEST="${MTOOLS}"                                           \
        ./prootemu                                             \
        ;
fi


#---------------------------------------------------------------
# musl toolchains

##
# We have a musl-targeting toolchain that runs on the host, but
# it is linked to the host libc and may not run in this chroot.
#
# We need to build the same toolchain, but static. There are two
# possible directions to go: (1) a cross toolchain that runs at
# full speed on the build machine, at the cost of having to tell
# downstream build scripts how to cross compile, or (2) a native
# toolchain for the foreign (target) architecture that runs slow
# in QEMU and still requires the cross toolchain to build it.
#
# Build both. We will eventually need both toolchains, anyway.
#
if ! test -d "${MTOOLS}/sys/tc/native"; then # FIXME: no hard code
    cd "${BASE}";

    test -d musl-cross-make                                    \
        || git clone ${CHAINS}/musl-cross-make.git;
    cd musl-cross-make;
    git pull; # always use the latest

    ##
    # Ensure consistent 'config.mak' for all toolchain builds.
    #
    cp "${MTOOLS}"/config.mak config.mak;

    ## musl toolchains
    #
    # Build these toolchains statically using the musl toolchain
    # from the seed rootfs so that it is known to work correctly
    # (the original musl toolchain itself may itself be linked
    # with glibc or be unsafe to use in some contexts).
    #
    # Note: "native" is for the foreign target CPU architecture.
    #
    PATH="${MTOOLS}/musl/bin:${MTOOLS}/sys/bin"                \
        ./scripts/build ${TARGET}                              \
        ;
    for k in cross native; do
        rm -fr "${MTOOLS}"/sys/tc/${k};
        mkdir  "${MTOOLS}"/sys/tc/${k};
        tar -C "${MTOOLS}"/sys/tc/${k}                         \
            --strip-components=1                               \
            -xzf output/${TARGET}-${k}.tgz                     \
            ;
    done
fi


#---------------------------------------------------------------
# build tools (cross)

##
# Build 'abuild', its dependencies, and other utilities.
#
PROOT_NO_SECCOMP=1                                             \
PATH="/tc/cross/bin:${DEF_PATH}"                               \
SHELL=/bin/sh                                                  \
DEST=/usr/local                                                \
CURL_CA_BUNDLE=/cacert.pem                                     \
CROSS_COMPILE=${TARGET}-                                       \
CC=${TARGET}-gcc                                               \
CXX=${TARGET}-g++                                              \
AR=${TARGET}-ar                                                \
LD=${TARGET}-ld                                                \
CPP=${TARGET}-cpp                                              \
${MTOOLS}/sys/emus/bin/proot                                   \
    -S "${MTOOLS}"/sys                                         \
    -q "${MTOOLS}"/sys/emus/bin/qemu-${q}                      \
    -b "${HERE}"                                               \
    "${HERE}"/bootstrap-abuild                                 \
    ;


#---------------------------------------------------------------
# mcmtools (host)

##
# Build the 'system/' repository.
#
# NOTE: The 'PATH' order is *really* important. Foreign "native"
# toolchains must be the first ones found; e.g. '/usr/bin/gcc'
# is a symlink to 'ccache' and isn't a functional compiler.
#
# NOTE: 'LD_LIBRARY_PATH' is needed because of nonstandard path
# of toolchain, which otherwise produces these errors:
#
# Error loading shared library libstdc++.so.6:
#     Exec format error (needed by ...)
# Error loading shared library libgcc_s.so.1:
#     Exec format error (needed by ...)
#
# Once 'coreutils' is built, 'uname' will return correctly, then
# future software should(tm) build as if it is built natively.
#
# FIXME: is Linux 3.2.0 really appropriate here?
#
PROOT_NO_SECCOMP=1                                             \
PATH="/tc/native/bin:${DEF_PATH}"                              \
LD_LIBRARY_PATH=/tc/native/lib \
SHELL=/bin/sh                                                  \
BASE="${BASE}"                                                 \
DEST="${BASE}"/mcmtools-${TARGET}                              \
ARCH=${TARGET}                                                 \
CURL_CA_BUNDLE=/cacert.pem                                     \
${MTOOLS}/sys/emus/bin/proot                                   \
    -S "${MTOOLS}"/sys                                         \
    -q "${MTOOLS}"/sys/emus/bin/qemu-${q}                      \
    -b "${HERE}"                                               \
    -k "3.2.0"                                                 \
    "${BASE}"/bootstrap/bootstrap                              \
    ;


#---------------------------------------------------------------
# system repository

# TODO


#---------------------------------------------------------------
# image creator

# TODO

# apk --root /foo --arch armv7 --initdb add
# apk --root /foo add adelie-core dash-binsh build-tools