# Shared helpers for the pangenome-tools image builders. # # Sourced from create-singularity-pangenome-tools.sh and # create-docker-pangenome-tools.sh. Resolves versions, names the # output, copies the pack into ~/tmp, and writes md5sum.txt and the # Markdown inventory. # # Callers source this file (which assumes "$0" is the front script) # and then call: # # pangenome_pack # pangenome_write_outputs # # Variables PACK_TARGET, PACK_LABEL, PACK_HASH are exported back to # the caller after pangenome_pack runs. set -eu SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd) DEST_DIR="$HOME/tmp" mkdir -p "$DEST_DIR" DATE=$(date +%Y%m%d) GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD) # The pack is built for the host architecture (no cross-compile). # Embed it in the filename so x86_64 / aarch64 / ... images cannot # be confused. Optional TUNE env var passes through to # `guix pack --tune=...` and is appended to the arch slug. # # Only the psABI v-levels are accepted as TUNE values: # # x86-64 baseline (any 64-bit Intel/AMD CPU) # x86-64-v2 Nehalem / Bulldozer (SSE4.2 + POPCNT) # x86-64-v3 Haswell / Zen 1 (AVX2 + BMI1/2 + FMA) # x86-64-v4 Skylake-SP / Zen 4 (AVX-512 F/DQ/CD/BW/VL) # # Microarch names like `cascadelake`, `znver3`, `skylake-avx512` # are NOT accepted: Go (which the closure pulls in via odgi etc.) # only understands the v-levels and `guix pack --tune=cascadelake` # fails with "compiler go@... does not support micro-architecture # cascadelake". Pick the v-level whose feature set is implied by # your target microarch (e.g. Cascade Lake/Zen 4 -> v4 because # both have AVX-512). ARCH=$(uname -m) TUNE="${TUNE:-}" # When TUNE is set, the v-level already implies the architecture # (x86-64-v4 only makes sense on x86_64); use it on its own to # avoid a redundant "x86_64-x86-64-v4" slug. ARCH_SLUG="${TUNE:-$ARCH}" if [ -n "$TUNE" ]; then case "$TUNE" in x86-64|x86-64-v2|x86-64-v3|x86-64-v4) : ;; *) cat >&2 </dev/null <<'SCM' (use-modules (guix packages) (gn packages pangenome) (gn packages pangenome-rust)) (format #t "~a ~a ~a~%" (package-version impg) (package-version wfmash-0.14-snapshot) (package-version pggb)) SCM ) EOF [ -n "${IMPG_VER:-}" ] && [ -n "${WFMASH_VER:-}" ] && [ -n "${PGGB_VER:-}" ] \ || { echo "could not resolve package versions" >&2; exit 1; } VERSION_STEM="$ARCH_SLUG-guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER" pangenome_pack () { fmt="$1" ; ext="$2" ; label="$3" echo "==> building $fmt pack from $CHANNEL_DIR${TUNE:+ (tune=$TUNE)}" extra="" [ "$fmt" = "docker" ] && extra="--entry-point=/bin/bash --image-tag=pangenome-tools:$GB_HASH" [ -n "$TUNE" ] && extra="$extra --tune=$TUNE" # shellcheck disable=SC2086 STORE_PATH=$(guix pack -f "$fmt" --no-offload \ -L "$CHANNEL_DIR" \ -S /bin=bin -S /etc/profile=etc/profile \ $extra \ mempang-workshop \ bash coreutils grep sed gzip \ | tail -n 1) if [ ! -e "$STORE_PATH" ]; then echo "guix pack -f $fmt did not produce a usable store path: $STORE_PATH" >&2 exit 1 fi PACK_HASH=$(basename "$STORE_PATH" | cut -c1-8) PACK_LABEL="$label" PACK_TARGET="$DEST_DIR/pangenome-tools-$VERSION_STEM-$label-$DATE-$PACK_HASH.$ext" echo "==> copying $STORE_PATH" echo " to $PACK_TARGET" cp -L "$STORE_PATH" "$PACK_TARGET" chmod u+w "$PACK_TARGET" } pangenome_write_outputs () { # Append our line to md5sum.txt, deduping by filename so re-runs # don't accumulate stale entries. MD5SUM_FILE="$DEST_DIR/md5sum.txt" LINE=$(cd "$DEST_DIR" && md5sum "$(basename "$PACK_TARGET")") TMP=$(mktemp) trap 'rm -f "$TMP"' EXIT [ -f "$MD5SUM_FILE" ] && grep -v " $(basename "$PACK_TARGET")\$" \ "$MD5SUM_FILE" > "$TMP" || true printf '%s\n' "$LINE" >> "$TMP" sort -k2 "$TMP" > "$MD5SUM_FILE" # Inventory is identical regardless of pack format -- name it # by channel hash + date only. INVENTORY="$DEST_DIR/pangenome-tools-$ARCH_SLUG-guix-bioinformatics-$GB_HASH-$DATE.md" TOOLS_TSV=$(mktemp) CLEAN_TSV=$(mktemp) trap 'rm -f "$TMP" "$TOOLS_TSV" "$CLEAN_TSV"' EXIT guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM' (use-modules (guix packages) (guix utils) (gn packages pangenome) (ice-9 format) (ice-9 regex)) ;; Only keep packages defined in gn/packages/pangenome.scm or ;; gn/packages/pangenome-rust.scm -- those are the real pangenome ;; tools; everything else (libc, R, python, coreutils, ...) is ;; infrastructure that ends up in the closure but isn't user-facing. (define pangenome-file-rx (make-regexp "gn/packages/pangenome(-rust)?\\.scm$")) (define (pangenome-package? p) (let ((loc (package-location p))) (and loc (regexp-exec pangenome-file-rx (location-file loc))))) (define seen (make-hash-table)) (define meta-packages '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop")) (define (emit p) (when (and (pangenome-package? p) (not (member (package-name p) meta-packages)) (not (hash-ref seen (package-name p)))) (hash-set! seen (package-name p) #t) (format #t "~a\t~a\t~a~%" (package-name p) (package-version p) (or (package-synopsis p) "")))) (define (expand x) (let ((p (if (pair? x) (cadr x) x))) (emit p) (for-each expand (package-propagated-inputs p)))) (for-each expand (package-propagated-inputs mempang-workshop)) SCM grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV" NAME_W=4 ; VER_W=7 ; DESC_W=11 while IFS=$(printf '\t') read -r n v d; do [ ${#n} -gt $NAME_W ] && NAME_W=${#n} [ ${#v} -gt $VER_W ] && VER_W=${#v} [ ${#d} -gt $DESC_W ] && DESC_W=${#d} done < "$CLEAN_TSV" dashes () { printf '%*s' "$1" '' | tr ' ' -; } { echo "# pangenome-tools $DATE ($ARCH_SLUG, guix-bioinformatics @ $GB_HASH)" echo echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH for $ARCH_SLUG." echo echo "**CPU compatibility:** $(cpu_compat)" echo printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \ "Tool" "Version" "Description" printf "| %s | %s | %s |\n" \ "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")" while IFS=$(printf '\t') read -r n v d; do printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d" done < "$CLEAN_TSV" } > "$INVENTORY" }