about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/create-singularity-pangenome-tools.sh136
1 files changed, 136 insertions, 0 deletions
diff --git a/scripts/create-singularity-pangenome-tools.sh b/scripts/create-singularity-pangenome-tools.sh
new file mode 100755
index 0000000..f25fbbf
--- /dev/null
+++ b/scripts/create-singularity-pangenome-tools.sh
@@ -0,0 +1,136 @@
+#!/bin/sh
+# Build a Singularity (SquashFS) image of mempang-workshop plus a
+# minimal shell environment, and drop a copy in ~/tmp renamed to
+#
+#   pangenome-tools-guix-bioinformatics-<GBHASH>-impg-<IMPG>-\
+#     wfmash-<WFMASH>-pggb-<PGGB>-singularity-<YYYYMMDD>-<HASH>.gz.squashfs
+#
+# where <GBHASH> is the short git rev of the guix-bioinformatics
+# checkout, <IMPG>/<WFMASH>/<PGGB> are queried from that channel,
+# <YYYYMMDD> is today's date, and <HASH> is the first 8 characters
+# of the store-path hash of the image.
+#
+# Run from anywhere; the script resolves the channel directory from
+# its own location.
+#
+# Usage: scripts/create-singularity-pangenome-tools.sh
+
+set -eu
+
+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
+CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
+DEST_DIR="$HOME/tmp"
+
+mkdir -p "$DEST_DIR"
+
+echo "==> building squashfs pack from $CHANNEL_DIR"
+STORE_PATH=$(guix pack -f squashfs --no-offload \
+                       -L "$CHANNEL_DIR" \
+                       -S /bin=bin -S /etc/profile=etc/profile \
+                       mempang-workshop \
+                       bash coreutils grep sed gzip \
+             | tail -n 1)
+
+if [ ! -e "$STORE_PATH" ]; then
+    echo "guix pack did not produce a usable store path: $STORE_PATH" >&2
+    exit 1
+fi
+
+BASENAME=$(basename "$STORE_PATH")
+HASH=$(echo "$BASENAME" | cut -c1-8)
+
+# Resolve the exact version each package will contribute to the
+# closure.  `guix package -A` is regex-on-name and can be ambiguous
+# (multiple wfmash variants), so go through `guix build -e ... -n`
+# and parse the store basename, which always carries the full version.
+resolve_version () {
+    local expr="$1" name="$2" path
+    path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \
+           | tail -n 1)
+    [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; }
+    basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//"
+}
+
+IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg)
+WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash)
+PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb)
+
+DATE=$(date +%Y%m%d)
+GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD)
+TARGET="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER-singularity-$DATE-$HASH.gz.squashfs"
+
+echo "==> copying $STORE_PATH"
+echo "    to $TARGET"
+cp -L "$STORE_PATH" "$TARGET"
+chmod u+w "$TARGET"
+
+echo "==> writing md5sum.txt"
+MD5SUM_FILE="$DEST_DIR/md5sum.txt"
+( cd "$DEST_DIR" && md5sum "$(basename "$TARGET")" ) > "$MD5SUM_FILE"
+
+echo "==> writing tool inventory"
+INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE-$HASH.md"
+TOOLS_TSV=$(mktemp)
+trap 'rm -f "$TOOLS_TSV"' EXIT
+guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM'
+(use-modules (guix packages) (gn packages pangenome) (ice-9 format))
+(define seen (make-hash-table))
+(define (emit p)
+  (unless (hash-ref seen (package-name p))
+    (hash-set! seen (package-name p) #t)
+    (format #t "~a\t~a\t~a~%"
+            (package-name p) (package-version p)
+            (or (package-synopsis p) ""))))
+(define (expand x)
+  (let ((p (if (pair? x) (cadr x) x)))
+    (cond
+     ((member (package-name p)
+              '("mempang-workshop-pangenomes" "pangenomes"))
+      (for-each expand (package-propagated-inputs p)))
+     (else (emit p)))))
+(for-each expand (package-propagated-inputs mempang-workshop))
+SCM
+CLEAN_TSV=$(mktemp)
+trap 'rm -f "$TOOLS_TSV" "$CLEAN_TSV"' EXIT
+# Strip generic Unix utilities, compilers, and the base R/Python
+# ecosystems so the inventory only lists pangenome-relevant tools.
+EXCLUDE='^(bc|coreutils|gawk|gcc|grep|gzip|parallel|pigz|sed|wget|which|zstd|python|python-.*|r-minimal|r-.*)\t'
+grep -P '^[a-z0-9]' "$TOOLS_TSV" \
+    | grep -vP "$EXCLUDE" > "$CLEAN_TSV"
+
+# Compute column widths so the rendered Markdown table is also
+# readable as raw text.
+NAME_W=4 ; VER_W=7 ; DESC_W=11
+while IFS=$(printf '\t') read -r n v d; do
+    [ ${#n} -gt $NAME_W ] && NAME_W=${#n}
+    [ ${#v} -gt $VER_W ] && VER_W=${#v}
+    [ ${#d} -gt $DESC_W ] && DESC_W=${#d}
+done < "$CLEAN_TSV"
+
+dashes () { printf '%*s' "$1" '' | tr ' ' -; }
+
+{
+    echo "# pangenome-tools $DATE ($HASH)"
+    echo
+    echo "Singularity image: \`$(basename "$TARGET")\`"
+    echo
+    echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH."
+    echo
+    printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \
+           "Tool" "Version" "Description"
+    printf "| %s | %s | %s |\n" \
+           "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")"
+    while IFS=$(printf '\t') read -r n v d; do
+        printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d"
+    done < "$CLEAN_TSV"
+} > "$INVENTORY"
+
+echo
+echo "Singularity image ready:"
+ls -lh "$TARGET"
+echo "md5sum:    $MD5SUM_FILE"
+echo "inventory: $INVENTORY"
+echo
+echo "Run with:"
+echo "  singularity exec $TARGET <command>"
+echo "  singularity shell $TARGET"