about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorpjotr2026-05-24 09:31:51 +0000
committerpjotr2026-05-29 08:59:13 +0000
commit2554e69813dd9ad06b2014913e15608be9d88e3d (patch)
treef31b4de47fd8d09e91b568915c5ce6e4b88e4b2c /scripts
parent1ecab04c3d9f675a0a183bfc2bb39463c19538ea (diff)
downloadguix-bioinformatics-2554e69813dd9ad06b2014913e15608be9d88e3d.tar.gz
Create pangenome singularity package
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/create-singularity-pangenome-tools.sh32
1 files changed, 20 insertions, 12 deletions
diff --git a/scripts/create-singularity-pangenome-tools.sh b/scripts/create-singularity-pangenome-tools.sh
index f25fbbf..837f720 100755
--- a/scripts/create-singularity-pangenome-tools.sh
+++ b/scripts/create-singularity-pangenome-tools.sh
@@ -73,30 +73,38 @@ INVENTORY="$DEST_DIR/pangenome-tools-guix-bioinformatics-$GB_HASH-$DATE-$HASH.md
 TOOLS_TSV=$(mktemp)
 trap 'rm -f "$TOOLS_TSV"' EXIT
 guix repl -L "$CHANNEL_DIR" -- /dev/stdin > "$TOOLS_TSV" <<'SCM'
-(use-modules (guix packages) (gn packages pangenome) (ice-9 format))
+(use-modules (guix packages) (guix utils) (gn packages pangenome)
+             (ice-9 format) (ice-9 regex))
+;; Only keep packages defined in gn/packages/pangenome.scm or
+;; gn/packages/pangenome-rust.scm -- those are the real pangenome
+;; tools; everything else (libc, R, python, coreutils, ...) is
+;; infrastructure that ends up in the closure but isn't user-facing.
+(define pangenome-file-rx
+  (make-regexp "gn/packages/pangenome(-rust)?\\.scm$"))
+(define (pangenome-package? p)
+  (let ((loc (package-location p)))
+    (and loc
+         (regexp-exec pangenome-file-rx (location-file loc)))))
 (define seen (make-hash-table))
+(define meta-packages
+  '("pangenomes" "mempang-workshop-pangenomes" "mempang-workshop"))
 (define (emit p)
-  (unless (hash-ref seen (package-name p))
+  (when (and (pangenome-package? p)
+             (not (member (package-name p) meta-packages))
+             (not (hash-ref seen (package-name p))))
     (hash-set! seen (package-name p) #t)
     (format #t "~a\t~a\t~a~%"
             (package-name p) (package-version p)
             (or (package-synopsis p) ""))))
 (define (expand x)
   (let ((p (if (pair? x) (cadr x) x)))
-    (cond
-     ((member (package-name p)
-              '("mempang-workshop-pangenomes" "pangenomes"))
-      (for-each expand (package-propagated-inputs p)))
-     (else (emit p)))))
+    (emit p)
+    (for-each expand (package-propagated-inputs p))))
 (for-each expand (package-propagated-inputs mempang-workshop))
 SCM
 CLEAN_TSV=$(mktemp)
 trap 'rm -f "$TOOLS_TSV" "$CLEAN_TSV"' EXIT
-# Strip generic Unix utilities, compilers, and the base R/Python
-# ecosystems so the inventory only lists pangenome-relevant tools.
-EXCLUDE='^(bc|coreutils|gawk|gcc|grep|gzip|parallel|pigz|sed|wget|which|zstd|python|python-.*|r-minimal|r-.*)\t'
-grep -P '^[a-z0-9]' "$TOOLS_TSV" \
-    | grep -vP "$EXCLUDE" > "$CLEAN_TSV"
+grep -P '^[a-z0-9]' "$TOOLS_TSV" > "$CLEAN_TSV"
 
 # Compute column widths so the rendered Markdown table is also
 # readable as raw text.