From e55eb5d5202f321333e7f5096059be81f6471c77 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 17 Jan 2026 12:38:00 +0100 Subject: Octopus --- .../genetics/test-pangenome-derived-genotypes.gmi | 25 +++++++++++++++++++--- topics/octopus/moosefs/moosefs-maintenance.gmi | 2 +- topics/systems/hpc/octopus-maintenance.gmi | 7 +++--- topics/systems/hpc/performance.gmi | 1 + 4 files changed, 27 insertions(+), 8 deletions(-) (limited to 'topics') diff --git a/topics/genetics/test-pangenome-derived-genotypes.gmi b/topics/genetics/test-pangenome-derived-genotypes.gmi index 3a8473a..f61e2c7 100644 --- a/topics/genetics/test-pangenome-derived-genotypes.gmi +++ b/topics/genetics/test-pangenome-derived-genotypes.gmi @@ -41,20 +41,36 @@ For mapping virtuoso contains four important ttl files: * computed QTL positions in pangenome-qtl graph * trait values in traits graph (nyi) +## Run gemma with new lmdb output ``` gemma-batch-run.sh ``` +## Convert GEMMA output to RDF + Next we convert that output to RDF with ``` ../bin/gemma-mdb-to-rdf.rb --header > output.ttl -time ../bin/gemma-mdb-to-rdf.rb --anno snps-matched.txt.mdb tmp/panlmm/*-gemma-GWA.tar.xz >> output.ttl # two hours for 7000 traits +time ../bin/gemma-mdb-to-rdf.rb --anno snps-matched.txt.mdb tmp/panlmm/*-gemma-GWA.tar.xz >> output.ttl +# 5.5 hours for 13K traits time serdi -i turtle -o ntriples output.ttl > output.n3 +real 2m22.336s +user 1m49.381s +sys 0m29.366s +SQL> SPARQL CLEAR GRAPH ; +10 min +SQL> SPARQL SELECT count(*) FROM WHERE { ?s ?p ?o } LIMIT 10; +126536658 +SQL> ld_dir('/export/data/virtuoso/ttl','pangenome-mapped-13K.n3','http://pangenome-mapped.genenetwork.org'); +SQL> rdf_loader_run (); // about 1 min per GB n3 +SQL> checkpoint; +SQL> SPARQL SELECT count(*) FROM WHERE { ?s ?p ?o } LIMIT 10; +179955246 ``` -(note that n3 files are less error prone and serdi does better than rapper with huge files) and copy the file to the virtuoso instance and load it with isql (note it may be worth search-replacing the gnt:run tag to something descriptive). +About 5.5 hours for 13K traits and (note that n3 files are less error prone and serdi does better than rapper with huge files) and copy the file to the virtuoso instance and load it with isql (note it may be worth search-replacing the gnt:run tag to something descriptive). ``` cd /export/guix-containers/virtuoso/data/virtuoso/ttl/ @@ -148,7 +164,10 @@ See also Sort that list and save as 'pan-ids-sorted.txt'. Next run ``` -../../bin/workflow/qtl-detect-batch-run.sh +time ../../bin/workflow/qtl-detect-batch-run.sh pan-ids-sorted.txt +real 72m55.249s +user 16m31.276s +sys 3m54.365s ``` and load those in virtuoso. List new QTL diff --git a/topics/octopus/moosefs/moosefs-maintenance.gmi b/topics/octopus/moosefs/moosefs-maintenance.gmi index f0d1912..d123bd1 100644 --- a/topics/octopus/moosefs/moosefs-maintenance.gmi +++ b/topics/octopus/moosefs/moosefs-maintenance.gmi @@ -295,7 +295,7 @@ WantedBy=multi-user.target Show missing, undergoal, and overgoal chunks: ``` -mfscli -H octopus04 -P 9521 -p -SMU +mfscli -H octopus04 -P 9521 -SMU mfscli -H octopus04 -P 9521 -SIC -2 ``` diff --git a/topics/systems/hpc/octopus-maintenance.gmi b/topics/systems/hpc/octopus-maintenance.gmi index d034575..ec5607f 100644 --- a/topics/systems/hpc/octopus-maintenance.gmi +++ b/topics/systems/hpc/octopus-maintenance.gmi @@ -28,10 +28,9 @@ we have draining nodes, but no jobs running on them Reviving draining node (as root) ``` -scontrol - update NodeName=octopus05 State=DOWN Reason="undraining" - update NodeName=octopus05 State=RESUME - show node octopus05 +scontrol update NodeName=octopus05 State=DOWN Reason="undraining" +scontrol update NodeName=octopus05 State=RESUME +scontrol show node octopus05 ``` Kill time can lead to drain state diff --git a/topics/systems/hpc/performance.gmi b/topics/systems/hpc/performance.gmi index ee604b5..73ebb8e 100644 --- a/topics/systems/hpc/performance.gmi +++ b/topics/systems/hpc/performance.gmi @@ -29,6 +29,7 @@ dd if=./test of=/dev/zero bs=512k count=2048 + ## Networking To check the network devices installed use -- cgit 1.4.1