about summary refs log tree commit diff
diff options
context:
space:
mode:
authorpjotr2026-05-24 09:37:14 +0000
committerpjotr2026-05-29 08:59:13 +0000
commit7069ddc63d1b462fbdb994055dd0006a4f4bc17b (patch)
tree8dbab6a9b59c56060757f6d8ce3267ec461c703a
parent2554e69813dd9ad06b2014913e15608be9d88e3d (diff)
downloadguix-bioinformatics-7069ddc63d1b462fbdb994055dd0006a4f4bc17b.tar.gz
Summary of the refactor:
  - scripts/lib-pangenome-pack.sh — shared (sourced): resolves versions, builds the pack, writes md5sum.txt and the Markdown inventory. Exposes
   pangenome_pack <fmt> <ext> <label> and pangenome_write_outputs.
  - scripts/create-singularity-pangenome-tools.sh — 22 lines: sources the lib, calls pangenome_pack squashfs gz.squashfs singularity.
  - scripts/create-docker-pangenome-tools.sh — 24 lines: sources the lib, calls pangenome_pack docker tar.gz docker.

  md5sum.txt deduplicates by filename across runs and sorts by filename, so running both scripts produces a single combined manifest. The
  inventory .md is identical content for both formats and names by <GBHASH>-<DATE> (no per-image hash, since contents are the same).
-rwxr-xr-xscripts/create-docker-pangenome-tools.sh24
-rwxr-xr-xscripts/create-singularity-pangenome-tools.sh139
-rw-r--r--scripts/lib-pangenome-pack.sh143
3 files changed, 175 insertions, 131 deletions
diff --git a/scripts/create-docker-pangenome-tools.sh b/scripts/create-docker-pangenome-tools.sh
new file mode 100755
index 0000000..9f65973
--- /dev/null
+++ b/scripts/create-docker-pangenome-tools.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# Build a Docker image (tar.gz, loadable via `docker load`) of
+# mempang-workshop plus a minimal shell environment, and drop a copy
+# in ~/tmp.  See lib-pangenome-pack.sh for the naming convention and
+# outputs.
+#
+# Usage: scripts/create-docker-pangenome-tools.sh
+#        docker load < ~/tmp/<the-tar.gz>
+#        docker run --rm -it pangenome-tools:<GBHASH>
+
+. "$(dirname "$0")/lib-pangenome-pack.sh"
+
+pangenome_pack docker tar.gz docker
+pangenome_write_outputs
+
+echo
+echo "Docker image ready:"
+ls -lh "$PACK_TARGET"
+echo "md5sum:    $MD5SUM_FILE"
+echo "inventory: $INVENTORY"
+echo
+echo "Run with:"
+echo "  docker load < $PACK_TARGET"
+echo "  docker run --rm -it pangenome-tools:$GB_HASH"
diff --git a/scripts/create-singularity-pangenome-tools.sh b/scripts/create-singularity-pangenome-tools.sh
index 837f720..93df530 100755
--- a/scripts/create-singularity-pangenome-tools.sh
+++ b/scripts/create-singularity-pangenome-tools.sh
@@ -1,144 +1,21 @@
 #!/bin/sh
 # Build a Singularity (SquashFS) image of mempang-workshop plus a
-# minimal shell environment, and drop a copy in ~/tmp renamed to
-#
-#   pangenome-tools-guix-bioinformatics-<GBHASH>-impg-<IMPG>-\
-#     wfmash-<WFMASH>-pggb-<PGGB>-singularity-<YYYYMMDD>-<HASH>.gz.squashfs
-#
-# where <GBHASH> is the short git rev of the guix-bioinformatics
-# checkout, <IMPG>/<WFMASH>/<PGGB> are queried from that channel,
-# <YYYYMMDD> is today's date, and <HASH> is the first 8 characters
-# of the store-path hash of the image.
-#
-# Run from anywhere; the script resolves the channel directory from
-# its own location.
+# minimal shell environment, and drop a copy in ~/tmp.  See
+# lib-pangenome-pack.sh for the naming convention and outputs.
 #
 # Usage: scripts/create-singularity-pangenome-tools.sh
 
-set -eu
-
-SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
-CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
-DEST_DIR="$HOME/tmp"
-
-mkdir -p "$DEST_DIR"
-
-echo "==> building squashfs pack from $CHANNEL_DIR"
-STORE_PATH=$(guix pack -f squashfs --no-offload \
-                       -L "$CHANNEL_DIR" \
-                       -S /bin=bin -S /etc/profile=etc/profile \
-                       mempang-workshop \
-                       bash coreutils grep sed gzip \
-             | tail -n 1)
-
-if [ ! -e "$STORE_PATH" ]; then
-    echo "guix pack did not produce a usable store path: $STORE_PATH" >&2
-    exit 1
-fi
-
-BASENAME=$(basename "$STORE_PATH")
-HASH=$(echo "$BASENAME" | cut -c1-8)
-
-# Resolve the exact version each package will contribute to the
-# closure.  `guix package -A` is regex-on-name and can be ambiguous
-# (multiple wfmash variants), so go through `guix build -e ... -n`
-# and parse the store basename, which always carries the full version.
-resolve_version () {
-    local expr="$1" name="$2" path
-    path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \
-           | tail -n 1)
-    [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; }
-    basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//"
-}
-
-IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg)
-WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash)
-PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb)
-
-DATE=$(date +%Y%m%d)
-GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD)
-TARGET="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER-singularity-$DATE-$HASH.gz.squashfs"
-
-echo "==> copying $STORE_PATH"
-echo "    to $TARGET"
-cp -L "$STORE_PATH" "$TARGET"
-chmod u+w "$TARGET"
-
-echo "==> writing md5sum.txt"
-MD5SUM_FILE="$DEST_DIR/md5sum.txt"
-( cd "$DEST_DIR" && md5sum "$(basename "$TARGET")" ) > "$MD5SUM_FILE"
-
-echo "==> writing tool inventory"
-INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE-$HASH.md"
-TOOLS_TSV=$(mktemp)
-trap 'rm -f "$TOOLS_TSV"' EXIT
-guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM'
-(use-modules (guix packages) (guix utils) (gn packages pangenome)
-             (ice-9 format) (ice-9 regex))
-;; Only keep packages defined in gn/packages/pangenome.scm or
-;; gn/packages/pangenome-rust.scm -- those are the real pangenome
-;; tools; everything else (libc, R, python, coreutils, ...) is
-;; infrastructure that ends up in the closure but isn't user-facing.
-(define pangenome-file-rx
-  (make-regexp "gn/packages/pangenome(-rust)?\\.scm$"))
-(define (pangenome-package? p)
-  (let ((loc (package-location p)))
-    (and loc
-         (regexp-exec pangenome-file-rx (location-file loc)))))
-(define seen (make-hash-table))
-(define meta-packages
-  '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop"))
-(define (emit p)
-  (when (and (pangenome-package? p)
-             (not (member (package-name p) meta-packages))
-             (not (hash-ref seen (package-name p))))
-    (hash-set! seen (package-name p) #t)
-    (format #t "~a\t~a\t~a~%"
-            (package-name p) (package-version p)
-            (or (package-synopsis p) ""))))
-(define (expand x)
-  (let ((p (if (pair? x) (cadr x) x)))
-    (emit p)
-    (for-each expand (package-propagated-inputs p))))
-(for-each expand (package-propagated-inputs mempang-workshop))
-SCM
-CLEAN_TSV=$(mktemp)
-trap 'rm -f "$TOOLS_TSV" "$CLEAN_TSV"' EXIT
-grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV"
-
-# Compute column widths so the rendered Markdown table is also
-# readable as raw text.
-NAME_W=4 ; VER_W=7 ; DESC_W=11
-while IFS=$(printf '\t') read -r n v d; do
-    [ ${#n} -gt $NAME_W ] && NAME_W=${#n}
-    [ ${#v} -gt $VER_W ] && VER_W=${#v}
-    [ ${#d} -gt $DESC_W ] && DESC_W=${#d}
-done < "$CLEAN_TSV"
-
-dashes () { printf '%*s' "$1" '' | tr ' ' -; }
+. "$(dirname "$0")/lib-pangenome-pack.sh"
 
-{
-    echo "# pangenome-tools $DATE ($HASH)"
-    echo
-    echo "Singularity image: \`$(basename "$TARGET")\`"
-    echo
-    echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH."
-    echo
-    printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \
-           "Tool" "Version" "Description"
-    printf "| %s | %s | %s |\n" \
-           "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")"
-    while IFS=$(printf '\t') read -r n v d; do
-        printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d"
-    done < "$CLEAN_TSV"
-} > "$INVENTORY"
+pangenome_pack squashfs gz.squashfs singularity
+pangenome_write_outputs
 
 echo
 echo "Singularity image ready:"
-ls -lh "$TARGET"
+ls -lh "$PACK_TARGET"
 echo "md5sum:    $MD5SUM_FILE"
 echo "inventory: $INVENTORY"
 echo
 echo "Run with:"
-echo "  singularity exec $TARGET <command>"
-echo "  singularity shell $TARGET"
+echo "  singularity exec $PACK_TARGET <command>"
+echo "  singularity shell $PACK_TARGET"
diff --git a/scripts/lib-pangenome-pack.sh b/scripts/lib-pangenome-pack.sh
new file mode 100644
index 0000000..35c50f7
--- /dev/null
+++ b/scripts/lib-pangenome-pack.sh
@@ -0,0 +1,143 @@
+# Shared helpers for the pangenome-tools image builders.
+#
+# Sourced from create-singularity-pangenome-tools.sh and
+# create-docker-pangenome-tools.sh.  Resolves versions, names the
+# output, copies the pack into ~/tmp, and writes md5sum.txt and the
+# Markdown inventory.
+#
+# Callers source this file (which assumes "$0" is the front script)
+# and then call:
+#
+#   pangenome_pack <guix-pack-format> <file-extension> <name-label>
+#   pangenome_write_outputs
+#
+# Variables PACK_TARGET, PACK_LABEL, PACK_HASH are exported back to
+# the caller after pangenome_pack runs.
+
+set -eu
+
+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
+CHANNEL_DIR=$(cd "$SCRIPT_DIR/.." && pwd)
+DEST_DIR="$HOME/tmp"
+mkdir -p "$DEST_DIR"
+
+DATE=$(date +%Y%m%d)
+GB_HASH=$(git -C "$CHANNEL_DIR" rev-parse --short=8 HEAD)
+
+# Resolve the exact version each package contributes to the closure.
+# `guix package -A` is regex-on-name and can be ambiguous (multiple
+# wfmash variants), so go through `guix build -e ... -n` and parse
+# the store basename, which always carries the full version.
+resolve_version () {
+    expr="$1" ; name="$2"
+    path=$(guix build --no-offload -L "$CHANNEL_DIR" -e "$expr" -n 2>/dev/null \
+           | tail -n 1)
+    [ -n "$path" ] || { echo "could not resolve $name" >&2; exit 1; }
+    basename "$path" | sed -E "s/^[a-z0-9]+-${name}-//"
+}
+
+IMPG_VER=$(resolve_version '(@ (gn packages pangenome-rust) impg)' impg)
+WFMASH_VER=$(resolve_version '(@ (gn packages pangenome) wfmash-0.14-snapshot)' wfmash)
+PGGB_VER=$(resolve_version '(@ (gn packages pangenome) pggb)' pggb)
+
+VERSION_STEM="guix-bioinformatics-$GB_HASH-impg-$IMPG_VER-wfmash-$WFMASH_VER-pggb-$PGGB_VER"
+
+pangenome_pack () {
+    fmt="$1" ; ext="$2" ; label="$3"
+    echo "==> building $fmt pack from $CHANNEL_DIR"
+    extra=""
+    [ "$fmt" = "docker" ] && extra="--entry-point=/bin/bash --image-tag=pangenome-tools:$GB_HASH"
+    # shellcheck disable=SC2086
+    STORE_PATH=$(guix pack -f "$fmt" --no-offload \
+                          -L "$CHANNEL_DIR" \
+                          -S /bin=bin -S /etc/profile=etc/profile \
+                          $extra \
+                          mempang-workshop \
+                          bash coreutils grep sed gzip \
+                  | tail -n 1)
+    if [ ! -e "$STORE_PATH" ]; then
+        echo "guix pack -f $fmt did not produce a usable store path: $STORE_PATH" >&2
+        exit 1
+    fi
+    PACK_HASH=$(basename "$STORE_PATH" | cut -c1-8)
+    PACK_LABEL="$label"
+    PACK_TARGET="$DEST_DIR/pangenome-tools-$VERSION_STEM-$label-$DATE-$PACK_HASH.$ext"
+
+    echo "==> copying $STORE_PATH"
+    echo "    to $PACK_TARGET"
+    cp -L "$STORE_PATH" "$PACK_TARGET"
+    chmod u+w "$PACK_TARGET"
+}
+
+pangenome_write_outputs () {
+    # Append our line to md5sum.txt, deduping by filename so re-runs
+    # don't accumulate stale entries.
+    MD5SUM_FILE="$DEST_DIR/md5sum.txt"
+    LINE=$(cd "$DEST_DIR" && md5sum "$(basename "$PACK_TARGET")")
+    TMP=$(mktemp)
+    trap 'rm -f "$TMP"' EXIT
+    [ -f "$MD5SUM_FILE" ] && grep -v "  $(basename "$PACK_TARGET")\$" \
+                                  "$MD5SUM_FILE" > "$TMP" || true
+    printf '%s\n' "$LINE" >> "$TMP"
+    sort -k2 "$TMP" > "$MD5SUM_FILE"
+
+    # Inventory is identical regardless of pack format -- name it
+    # by channel hash + date only.
+    INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE.md"
+    TOOLS_TSV=$(mktemp)
+    CLEAN_TSV=$(mktemp)
+    trap 'rm -f "$TMP" "$TOOLS_TSV" "$CLEAN_TSV"' EXIT
+    guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM'
+(use-modules (guix packages) (guix utils) (gn packages pangenome)
+             (ice-9 format) (ice-9 regex))
+;; Only keep packages defined in gn/packages/pangenome.scm or
+;; gn/packages/pangenome-rust.scm -- those are the real pangenome
+;; tools; everything else (libc, R, python, coreutils, ...) is
+;; infrastructure that ends up in the closure but isn't user-facing.
+(define pangenome-file-rx
+  (make-regexp "gn/packages/pangenome(-rust)?\\.scm$"))
+(define (pangenome-package? p)
+  (let ((loc (package-location p)))
+    (and loc (regexp-exec pangenome-file-rx (location-file loc)))))
+(define seen (make-hash-table))
+(define meta-packages
+  '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop"))
+(define (emit p)
+  (when (and (pangenome-package? p)
+             (not (member (package-name p) meta-packages))
+             (not (hash-ref seen (package-name p))))
+    (hash-set! seen (package-name p) #t)
+    (format #t "~a\t~a\t~a~%"
+            (package-name p) (package-version p)
+            (or (package-synopsis p) ""))))
+(define (expand x)
+  (let ((p (if (pair? x) (cadr x) x)))
+    (emit p)
+    (for-each expand (package-propagated-inputs p))))
+(for-each expand (package-propagated-inputs mempang-workshop))
+SCM
+    grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV"
+
+    NAME_W=4 ; VER_W=7 ; DESC_W=11
+    while IFS=$(printf '\t') read -r n v d; do
+        [ ${#n} -gt $NAME_W ] && NAME_W=${#n}
+        [ ${#v} -gt $VER_W ]  && VER_W=${#v}
+        [ ${#d} -gt $DESC_W ] && DESC_W=${#d}
+    done < "$CLEAN_TSV"
+
+    dashes () { printf '%*s' "$1" '' | tr ' ' -; }
+
+    {
+        echo "# pangenome-tools $DATE (guix-bioinformatics @ $GB_HASH)"
+        echo
+        echo "Built from \`mempang-workshop\` in guix-bioinformatics @ $GB_HASH."
+        echo
+        printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" \
+               "Tool" "Version" "Description"
+        printf "| %s | %s | %s |\n" \
+               "$(dashes "$NAME_W")" "$(dashes "$VER_W")" "$(dashes "$DESC_W")"
+        while IFS=$(printf '\t') read -r n v d; do
+            printf "| %-${NAME_W}s | %-${VER_W}s | %-${DESC_W}s |\n" "$n" "$v" "$d"
+        done < "$CLEAN_TSV"
+    } > "$INVENTORY"
+}