From 1ecab04c3d9f675a0a183bfc2bb39463c19538ea Mon Sep 17 00:00:00 2001 From: pjotr Date: Sun, 24 May 2026 09:26:05 +0000 Subject: Create pangenome singularity package --- scripts/create-singularity-pangenome-tools.sh | 136 ++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100755 scripts/create-singularity-pangenome-tools.sh diff --git a/scripts/create-singularity-pangenome-tools.sh b/scripts/create-singularity-pangenome-tools.sh new file mode 100755 index 0000000..f25fbbf --- /dev/null +++ b/scripts/create-singularity-pangenome-tools.sh @@ -0,0 +1,136 @@ +#!/bin/sh +# Build a Singularity (SquashFS) image of mempang-workshop plus a +# minimal shell environment, and drop a copy in ~/tmp renamed to +# +# pangenome-tools-guix-bioinformatics--impg--\ +# wfmash--pggb--singularity--.gz.squashfs +# +# where is the short git rev of the guix-bioinformatics +# checkout, // are queried from that channel, +# is today's date, and is the first 8 characters +# of the store-path hash of the image. +# +# Run from anywhere; the script resolves the channel directory from +# its own location. +# +# Usage: scripts/create-singularity-pangenome-tools.sh + +set -eu + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd) +DEST_DIR="$HOME/tmp" + +mkdir -p "$DEST_DIR" + +echo "==> building squashfs pack from $CHANNEL_DIR" +STORE_PATH=$(guix pack -f squashfs --no-offload \ + -L "$CHANNEL_DIR" \ + -S /bin=bin -S /etc/profile=etc/profile \ + mempang-workshop \ + bash coreutils grep sed gzip \ + | tail -n 1) + +if [ ! -e "$STORE_PATH" ]; then + echo "guix pack did not produce a usable store path: $STORE_PATH" >&2 + exit 1 +fi + +BASENAME=$(basename "$STORE_PATH") +HASH=$(echo "$BASENAME" | cut -c1-8) + +# Resolve the exact version each package will contribute to the +# closure. `guix package -A` is regex-on-name and can be ambiguous +# (multiple wfmash variants), so go through `guix build -e ... -n` +# and parse the store basename, which always carries the full version. +resolve_version () { + local expr="$1" name="$2" path + path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \ + | tail -n 1) + [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; } + basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//" +} + +IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg) +WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash) +PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb) + +DATE=$(date +%Y%m%d) +GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD) +TARGET="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER-singularity-$DATE-$HASH.gz.squashfs" + +echo "==> copying $STORE_PATH" +echo " to $TARGET" +cp -L "$STORE_PATH" "$TARGET" +chmod u+w "$TARGET" + +echo "==> writing md5sum.txt" +MD5SUM_FILE="$DEST_DIR/md5sum.txt" +( cd "$DEST_DIR" && md5sum "$(basename "$TARGET")" ) > "$MD5SUM_FILE" + +echo "==> writing tool inventory" +INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE-$HASH.md" +TOOLS_TSV=$(mktemp) +trap 'rm -f "$TOOLS_TSV"' EXIT +guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM' +(use-modules (guix packages) (gn packages pangenome) (ice-9 format)) +(define seen (make-hash-table)) +(define (emit p) + (unless (hash-ref seen (package-name p)) + (hash-set! seen (package-name p) #t) + (format #t "~a\t~a\t~a~%" + (package-name p) (package-version p) + (or (package-synopsis p) "")))) +(define (expand x) + (let ((p (if (pair? x) (cadr x) x))) + (cond + ((member (package-name p) + '("mempang-workshop-pangenomes" "pangenomes")) + (for-each expand (package-propagated-inputs p))) + (else (emit p))))) +(for-each expand (package-propagated-inputs mempang-workshop)) +SCM +CLEAN_TSV=$(mktemp) +trap 'rm -f "$TOOLS_TSV" "$CLEAN_TSV"' EXIT +# Strip generic Unix utilities, compilers, and the base R/Python +# ecosystems so the inventory only lists pangenome-relevant tools. +EXCLUDE='^(bc|coreutils|gawk|gcc|grep|gzip|parallel|pigz|sed|wget|which|zstd|python|python-.*|r-minimal|r-.*)\t' +grep -P '^[a-z0-9]' "$TOOLS_TSV" \ + | grep -vP "$EXCLUDE" > "$CLEAN_TSV" + +# Compute column widths so the rendered Markdown table is also +# readable as raw text. +NAME_W=4 ; VER_W=7 ; DESC_W=11 +while IFS=$(printf '\t') read -r n v d; do + [ ${#n} -gt $NAME_W ] && NAME_W=${#n} + [ ${#v} -gt $VER_W ] && VER_W=${#v} + [ ${#d} -gt $DESC_W ] && DESC_W=${#d} +done < "$CLEAN_TSV" + +dashes () { printf '%*s' "$1" '' | tr ' ' -; } + +{ + echo "# pangenome-tools $DATE ($HASH)" + echo + echo "Singularity image: \`$(basename "$TARGET")\`" + echo + echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH." + echo + printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \ + "Tool" "Version" "Description" + printf "| %s | %s | %s |\n" \ + "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")" + while IFS=$(printf '\t') read -r n v d; do + printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d" + done < "$CLEAN_TSV" +} > "$INVENTORY" + +echo +echo "Singularity image ready:" +ls -lh "$TARGET" +echo "md5sum: $MD5SUM_FILE" +echo "inventory: $INVENTORY" +echo +echo "Run with:" +echo " singularity exec $TARGET " +echo " singularity shell $TARGET" -- cgit 1.4.1