diff options
| author | pjotr | 2026-05-24 09:37:14 +0000 |
|---|---|---|
| committer | pjotr | 2026-05-29 08:59:13 +0000 |
| commit | 7069ddc63d1b462fbdb994055dd0006a4f4bc17b (patch) | |
| tree | 8dbab6a9b59c56060757f6d8ce3267ec461c703a | |
| parent | 2554e69813dd9ad06b2014913e15608be9d88e3d (diff) | |
| download | guix-bioinformatics-7069ddc63d1b462fbdb994055dd0006a4f4bc17b.tar.gz | |
Summary of the refactor:
- scripts/lib-pangenome-pack.sh — shared (sourced): resolves versions, builds the pack, writes md5sum.txt and the Markdown inventory. Exposes pangenome_pack <fmt> <ext> <label> and pangenome_write_outputs. - scripts/create-singularity-pangenome-tools.sh — 22 lines: sources the lib, calls pangenome_pack squashfs gz.squashfs singularity. - scripts/create-docker-pangenome-tools.sh — 24 lines: sources the lib, calls pangenome_pack docker tar.gz docker. md5sum.txt deduplicates by filename across runs and sorts by filename, so running both scripts produces a single combined manifest. The inventory .md is identical content for both formats and names by <GBHASH>-<DATE> (no per-image hash, since contents are the same).
| -rwxr-xr-x | scripts/create-docker-pangenome-tools.sh | 24 | ||||
| -rwxr-xr-x | scripts/create-singularity-pangenome-tools.sh | 139 | ||||
| -rw-r--r-- | scripts/lib-pangenome-pack.sh | 143 |
3 files changed, 175 insertions, 131 deletions
diff --git a/scripts/create-docker-pangenome-tools.sh b/scripts/create-docker-pangenome-tools.sh new file mode 100755 index 0000000..9f65973 --- /dev/null +++ b/scripts/create-docker-pangenome-tools.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# Build a Docker image (tar.gz, loadable via `docker load`) of +# mempang-workshop plus a minimal shell environment, and drop a copy +# in ~/tmp. See lib-pangenome-pack.sh for the naming convention and +# outputs. +# +# Usage: scripts/create-docker-pangenome-tools.sh +# docker load < ~/tmp/<the-tar.gz> +# docker run --rm -it pangenome-tools:<GBHASH> + +. "$(dirname "$0")/lib-pangenome-pack.sh" + +pangenome_pack docker tar.gz docker +pangenome_write_outputs + +echo +echo "Docker image ready:" +ls -lh "$PACK_TARGET" +echo "md5sum: $MD5SUM_FILE" +echo "inventory: $INVENTORY" +echo +echo "Run with:" +echo " docker load < $PACK_TARGET" +echo " docker run --rm -it pangenome-tools:$GB_HASH" diff --git a/scripts/create-singularity-pangenome-tools.sh b/scripts/create-singularity-pangenome-tools.sh index 837f720..93df530 100755 --- a/scripts/create-singularity-pangenome-tools.sh +++ b/scripts/create-singularity-pangenome-tools.sh @@ -1,144 +1,21 @@ #!/bin/sh # Build a Singularity (SquashFS) image of mempang-workshop plus a -# minimal shell environment, and drop a copy in ~/tmp renamed to -# -# pangenome-tools-guix-bioinformatics-<GBHASH>-impg-<IMPG>-\ -# wfmash-<WFMASH>-pggb-<PGGB>-singularity-<YYYYMMDD>-<HASH>.gz.squashfs -# -# where <GBHASH> is the short git rev of the guix-bioinformatics -# checkout, <IMPG>/<WFMASH>/<PGGB> are queried from that channel, -# <YYYYMMDD> is today's date, and <HASH> is the first 8 characters -# of the store-path hash of the image. -# -# Run from anywhere; the script resolves the channel directory from -# its own location. +# minimal shell environment, and drop a copy in ~/tmp. See +# lib-pangenome-pack.sh for the naming convention and outputs. # # Usage: scripts/create-singularity-pangenome-tools.sh -set -eu - -SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) -CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd) -DEST_DIR="$HOME/tmp" - -mkdir -p "$DEST_DIR" - -echo "==> building squashfs pack from $CHANNEL_DIR" -STORE_PATH=$(guix pack -f squashfs --no-offload \ - -L "$CHANNEL_DIR" \ - -S /bin=bin -S /etc/profile=etc/profile \ - mempang-workshop \ - bash coreutils grep sed gzip \ - | tail -n 1) - -if [ ! -e "$STORE_PATH" ]; then - echo "guix pack did not produce a usable store path: $STORE_PATH" >&2 - exit 1 -fi - -BASENAME=$(basename "$STORE_PATH") -HASH=$(echo "$BASENAME" | cut -c1-8) - -# Resolve the exact version each package will contribute to the -# closure. `guix package -A` is regex-on-name and can be ambiguous -# (multiple wfmash variants), so go through `guix build -e ... -n` -# and parse the store basename, which always carries the full version. -resolve_version () { - local expr="$1" name="$2" path - path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \ - | tail -n 1) - [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; } - basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//" -} - -IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg) -WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash) -PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb) - -DATE=$(date +%Y%m%d) -GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD) -TARGET="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER-singularity-$DATE-$HASH.gz.squashfs" - -echo "==> copying $STORE_PATH" -echo " to $TARGET" -cp -L "$STORE_PATH" "$TARGET" -chmod u+w "$TARGET" - -echo "==> writing md5sum.txt" -MD5SUM_FILE="$DEST_DIR/md5sum.txt" -( cd "$DEST_DIR" && md5sum "$(basename "$TARGET")" ) > "$MD5SUM_FILE" - -echo "==> writing tool inventory" -INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE-$HASH.md" -TOOLS_TSV=$(mktemp) -trap 'rm -f "$TOOLS_TSV"' EXIT -guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM' -(use-modules (guix packages) (guix utils) (gn packages pangenome) - (ice-9 format) (ice-9 regex)) -;; Only keep packages defined in gn/packages/pangenome.scm or -;; gn/packages/pangenome-rust.scm -- those are the real pangenome -;; tools; everything else (libc, R, python, coreutils, ...) is -;; infrastructure that ends up in the closure but isn't user-facing. -(define pangenome-file-rx - (make-regexp "gn/packages/pangenome(-rust)?\\.scm$")) -(define (pangenome-package? p) - (let ((loc (package-location p))) - (and loc - (regexp-exec pangenome-file-rx (location-file loc))))) -(define seen (make-hash-table)) -(define meta-packages - '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop")) -(define (emit p) - (when (and (pangenome-package? p) - (not (member (package-name p) meta-packages)) - (not (hash-ref seen (package-name p)))) - (hash-set! seen (package-name p) #t) - (format #t "~a\t~a\t~a~%" - (package-name p) (package-version p) - (or (package-synopsis p) "")))) -(define (expand x) - (let ((p (if (pair? x) (cadr x) x))) - (emit p) - (for-each expand (package-propagated-inputs p)))) -(for-each expand (package-propagated-inputs mempang-workshop)) -SCM -CLEAN_TSV=$(mktemp) -trap 'rm -f "$TOOLS_TSV" "$CLEAN_TSV"' EXIT -grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV" - -# Compute column widths so the rendered Markdown table is also -# readable as raw text. -NAME_W=4 ; VER_W=7 ; DESC_W=11 -while IFS=$(printf '\t') read -r n v d; do - [ ${#n} -gt $NAME_W ] && NAME_W=${#n} - [ ${#v} -gt $VER_W ] && VER_W=${#v} - [ ${#d} -gt $DESC_W ] && DESC_W=${#d} -done < "$CLEAN_TSV" - -dashes () { printf '%*s' "$1" '' | tr ' ' -; } +. "$(dirname "$0")/lib-pangenome-pack.sh" -{ - echo "# pangenome-tools $DATE ($HASH)" - echo - echo "Singularity image: \`$(basename "$TARGET")\`" - echo - echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH." - echo - printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \ - "Tool" "Version" "Description" - printf "| %s | %s | %s |\n" \ - "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")" - while IFS=$(printf '\t') read -r n v d; do - printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d" - done < "$CLEAN_TSV" -} > "$INVENTORY" +pangenome_pack squashfs gz.squashfs singularity +pangenome_write_outputs echo echo "Singularity image ready:" -ls -lh "$TARGET" +ls -lh "$PACK_TARGET" echo "md5sum: $MD5SUM_FILE" echo "inventory: $INVENTORY" echo echo "Run with:" -echo " singularity exec $TARGET <command>" -echo " singularity shell $TARGET" +echo " singularity exec $PACK_TARGET <command>" +echo " singularity shell $PACK_TARGET" diff --git a/scripts/lib-pangenome-pack.sh b/scripts/lib-pangenome-pack.sh new file mode 100644 index 0000000..35c50f7 --- /dev/null +++ b/scripts/lib-pangenome-pack.sh @@ -0,0 +1,143 @@ +# Shared helpers for the pangenome-tools image builders. +# +# Sourced from create-singularity-pangenome-tools.sh and +# create-docker-pangenome-tools.sh. Resolves versions, names the +# output, copies the pack into ~/tmp, and writes md5sum.txt and the +# Markdown inventory. +# +# Callers source this file (which assumes "$0" is the front script) +# and then call: +# +# pangenome_pack <guix-pack-format> <file-extension> <name-label> +# pangenome_write_outputs +# +# Variables PACK_TARGET, PACK_LABEL, PACK_HASH are exported back to +# the caller after pangenome_pack runs. + +set -eu + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd) +DEST_DIR="$HOME/tmp" +mkdir -p "$DEST_DIR" + +DATE=$(date +%Y%m%d) +GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD) + +# Resolve the exact version each package contributes to the closure. +# `guix package -A` is regex-on-name and can be ambiguous (multiple +# wfmash variants), so go through `guix build -e ... -n` and parse +# the store basename, which always carries the full version. +resolve_version () { + expr="$1" ; name="$2" + path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \ + | tail -n 1) + [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; } + basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//" +} + +IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg) +WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash) +PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb) + +VERSION_STEM="guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER" + +pangenome_pack () { + fmt="$1" ; ext="$2" ; label="$3" + echo "==> building $fmt pack from $CHANNEL_DIR" + extra="" + [ "$fmt" = "docker" ] && extra="--entry-point=/bin/bash --image-tag=pangenome-tools:$GB_HASH" + # shellcheck disable=SC2086 + STORE_PATH=$(guix pack -f "$fmt" --no-offload \ + -L "$CHANNEL_DIR" \ + -S /bin=bin -S /etc/profile=etc/profile \ + $extra \ + mempang-workshop \ + bash coreutils grep sed gzip \ + | tail -n 1) + if [ ! -e "$STORE_PATH" ]; then + echo "guix pack -f $fmt did not produce a usable store path: $STORE_PATH" >&2 + exit 1 + fi + PACK_HASH=$(basename "$STORE_PATH" | cut -c1-8) + PACK_LABEL="$label" + PACK_TARGET="$DEST_DIR/pangenome-tools-$VERSION_STEM-$label-$DATE-$PACK_HASH.$ext" + + echo "==> copying $STORE_PATH" + echo " to $PACK_TARGET" + cp -L "$STORE_PATH" "$PACK_TARGET" + chmod u+w "$PACK_TARGET" +} + +pangenome_write_outputs () { + # Append our line to md5sum.txt, deduping by filename so re-runs + # don't accumulate stale entries. + MD5SUM_FILE="$DEST_DIR/md5sum.txt" + LINE=$(cd "$DEST_DIR" && md5sum "$(basename "$PACK_TARGET")") + TMP=$(mktemp) + trap 'rm -f "$TMP"' EXIT + [ -f "$MD5SUM_FILE" ] && grep -v " $(basename "$PACK_TARGET")\$" \ + "$MD5SUM_FILE" > "$TMP" || true + printf '%s\n' "$LINE" >> "$TMP" + sort -k2 "$TMP" > "$MD5SUM_FILE" + + # Inventory is identical regardless of pack format -- name it + # by channel hash + date only. + INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE.md" + TOOLS_TSV=$(mktemp) + CLEAN_TSV=$(mktemp) + trap 'rm -f "$TMP" "$TOOLS_TSV" "$CLEAN_TSV"' EXIT + guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM' +(use-modules (guix packages) (guix utils) (gn packages pangenome) + (ice-9 format) (ice-9 regex)) +;; Only keep packages defined in gn/packages/pangenome.scm or +;; gn/packages/pangenome-rust.scm -- those are the real pangenome +;; tools; everything else (libc, R, python, coreutils, ...) is +;; infrastructure that ends up in the closure but isn't user-facing. +(define pangenome-file-rx + (make-regexp "gn/packages/pangenome(-rust)?\\.scm$")) +(define (pangenome-package? p) + (let ((loc (package-location p))) + (and loc (regexp-exec pangenome-file-rx (location-file loc))))) +(define seen (make-hash-table)) +(define meta-packages + '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop")) +(define (emit p) + (when (and (pangenome-package? p) + (not (member (package-name p) meta-packages)) + (not (hash-ref seen (package-name p)))) + (hash-set! seen (package-name p) #t) + (format #t "~a\t~a\t~a~%" + (package-name p) (package-version p) + (or (package-synopsis p) "")))) +(define (expand x) + (let ((p (if (pair? x) (cadr x) x))) + (emit p) + (for-each expand (package-propagated-inputs p)))) +(for-each expand (package-propagated-inputs mempang-workshop)) +SCM + grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV" + + NAME_W=4 ; VER_W=7 ; DESC_W=11 + while IFS=$(printf '\t') read -r n v d; do + [ ${#n} -gt $NAME_W ] && NAME_W=${#n} + [ ${#v} -gt $VER_W ] && VER_W=${#v} + [ ${#d} -gt $DESC_W ] && DESC_W=${#d} + done < "$CLEAN_TSV" + + dashes () { printf '%*s' "$1" '' | tr ' ' -; } + + { + echo "# pangenome-tools $DATE (guix-bioinformatics @ $GB_HASH)" + echo + echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH." + echo + printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \ + "Tool" "Version" "Description" + printf "| %s | %s | %s |\n" \ + "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")" + while IFS=$(printf '\t') read -r n v d; do + printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d" + done < "$CLEAN_TSV" + } > "$INVENTORY" +} |
