From baeafc5ccc4a9893d22e6629db97720e3fa6d3ae Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 3 Dec 2023 09:47:38 -0600 Subject: Rename/move --- topics/better-logging.gmi | 29 -- topics/building-a-guix-pack-for-sharing.gmi | 30 -- topics/cd-logs.gmi | 6 - topics/ci-cd/cd-logs.gmi | 6 + topics/ci-cd/ci-rethink-slides.gmi | 28 ++ topics/ci-rethink-slides.gmi | 28 -- topics/code-antipatterns.gmi | 93 ------ topics/coding-guidelines.gmi | 16 - .../setting-up-local-development-database.gmi | 321 +++++++++++++++++++ topics/deploy/deployment.gmi | 36 +++ topics/deploy/installation.gmi | 354 +++++++++++++++++++++ topics/deploy/machines.gmi | 46 +++ topics/deploy/our-virtuoso-instances.gmi | 41 +++ topics/deploy/useful-shell-scripts.gmi | 52 +++ topics/deploy/uthsc-vpn-with-free-software.gmi | 60 ++++ topics/deploy/uthsc-vpn.scm | 44 +++ topics/deployment.gmi | 36 --- topics/developing-against-gn.gmi | 198 ------------ topics/genenetwork/developing-against-gn.gmi | 198 ++++++++++++ topics/genenetwork/phenotype-naming-convention.gmi | 33 ++ topics/genenetwork/starting_gn1.gmi | 102 ++++++ topics/genenetwork/starting_gn2_and_gn3.gmi | 52 +++ topics/genenetwork/temp-trait-submission.gmi | 11 + topics/guix/building-a-guix-pack-for-sharing.gmi | 30 ++ topics/installation.gmi | 354 --------------------- topics/machines.gmi | 46 --- topics/maybe-monad.gmi | 61 ---- topics/our-virtuoso-instances.gmi | 41 --- topics/phenotype-naming-convention.gmi | 33 -- topics/programming/better-logging.gmi | 29 ++ topics/programming/code-antipatterns.gmi | 93 ++++++ topics/programming/coding-guidelines.gmi | 16 + topics/programming/maybe-monad.gmi | 61 ++++ .../use-exceptions-to-indicate-errors.gmi | 16 + topics/setting-up-local-development-database.gmi | 321 ------------------- topics/starting_gn1.gmi | 102 ------ topics/starting_gn2_and_gn3.gmi | 52 --- topics/temp-trait-submission.gmi | 11 - topics/use-exceptions-to-indicate-errors.gmi | 16 - topics/useful-shell-scripts.gmi | 52 --- topics/uthsc-vpn-with-free-software.gmi | 60 ---- topics/uthsc-vpn.scm | 44 --- 42 files changed, 1629 insertions(+), 1629 deletions(-) delete mode 100644 topics/better-logging.gmi delete mode 100644 topics/building-a-guix-pack-for-sharing.gmi delete mode 100644 topics/cd-logs.gmi create mode 100644 topics/ci-cd/cd-logs.gmi create mode 100644 topics/ci-cd/ci-rethink-slides.gmi delete mode 100644 topics/ci-rethink-slides.gmi delete mode 100644 topics/code-antipatterns.gmi delete mode 100644 topics/coding-guidelines.gmi create mode 100644 topics/database/setting-up-local-development-database.gmi create mode 100644 topics/deploy/deployment.gmi create mode 100644 topics/deploy/installation.gmi create mode 100644 topics/deploy/machines.gmi create mode 100644 topics/deploy/our-virtuoso-instances.gmi create mode 100644 topics/deploy/useful-shell-scripts.gmi create mode 100644 topics/deploy/uthsc-vpn-with-free-software.gmi create mode 100644 topics/deploy/uthsc-vpn.scm delete mode 100644 topics/deployment.gmi delete mode 100644 topics/developing-against-gn.gmi create mode 100644 topics/genenetwork/developing-against-gn.gmi create mode 100644 topics/genenetwork/phenotype-naming-convention.gmi create mode 100644 topics/genenetwork/starting_gn1.gmi create mode 100644 topics/genenetwork/starting_gn2_and_gn3.gmi create mode 100644 topics/genenetwork/temp-trait-submission.gmi create mode 100644 topics/guix/building-a-guix-pack-for-sharing.gmi delete mode 100644 topics/installation.gmi delete mode 100644 topics/machines.gmi delete mode 100644 topics/maybe-monad.gmi delete mode 100644 topics/our-virtuoso-instances.gmi delete mode 100644 topics/phenotype-naming-convention.gmi create mode 100644 topics/programming/better-logging.gmi create mode 100644 topics/programming/code-antipatterns.gmi create mode 100644 topics/programming/coding-guidelines.gmi create mode 100644 topics/programming/maybe-monad.gmi create mode 100644 topics/programming/use-exceptions-to-indicate-errors.gmi delete mode 100644 topics/setting-up-local-development-database.gmi delete mode 100644 topics/starting_gn1.gmi delete mode 100644 topics/starting_gn2_and_gn3.gmi delete mode 100644 topics/temp-trait-submission.gmi delete mode 100644 topics/use-exceptions-to-indicate-errors.gmi delete mode 100644 topics/useful-shell-scripts.gmi delete mode 100644 topics/uthsc-vpn-with-free-software.gmi delete mode 100644 topics/uthsc-vpn.scm (limited to 'topics') diff --git a/topics/better-logging.gmi b/topics/better-logging.gmi deleted file mode 100644 index dca8c0d..0000000 --- a/topics/better-logging.gmi +++ /dev/null @@ -1,29 +0,0 @@ -# Improving Logging in GN2 - -## What Are We Trying To Solve? - -We prioritise maintaining user functionality over speed in GN [with time this speed will be improved]. As such we should be pay more attention at not breaking any currently working GN2 functionality. And when/if we do, trouble-shooting should be easy. On this front, one way is to stream-line logging in both GN2/GN3 and make it more script friendly - only report when something fails, not to instrument variables - and in so doing make the process of monitoring easier. - -## Goals - -* Have script-friendly error/info logs. -* Remove noise from GN2. -* Separate logging into different files: error logs, info logs. Add this somewhere with Flask itself instead of re-directing STDOUT to a file. - -### Non-goals - -* Logging in GN3. -* Parsing logs to extract goals. -* Getting rid of "gn.db" global object and in so doing removing "MySqlAlchemy" [that we really shouldn't be using]. -* Adding log messages to existing functions. - -## Actual Design - -* Get rid of "utility.logger" module and replace it with Flask's or Python's in-built logging. -* Configure the logging system to automatically add the module name, line number, time-stamps etc. -* Use a better format for displaying the logs. -* Delete "with Bench..." calls. - -## Resources - -=> https://realpython.com/python-logging/ Logging in Python diff --git a/topics/building-a-guix-pack-for-sharing.gmi b/topics/building-a-guix-pack-for-sharing.gmi deleted file mode 100644 index b82e81b..0000000 --- a/topics/building-a-guix-pack-for-sharing.gmi +++ /dev/null @@ -1,30 +0,0 @@ -# Building a ~guix pack~ for Environment Sharing - -## Motivation - -Sometimes, you might -* not have access to guix where you want to deploy -* not be able to run a guix shell (due to space constraints on the /gnu/store partition, etc) -* not be able to create a guix profile (due to space constraints on the /gnu/store partition, etc) -but you still need to have a similar environment for running your application. - -In such cases, ~guix pack~ can come in handy. - -I will use the process I used for -=> gnqc_py https://gitlab.com/fredmanglis/gnqc_py -for demonstration - -> guix pack -RR -S /mybin=bin -S /mylib=lib \ -> -L /home/frederick/genenetwork/guix-bioinformatics \ -> python redis mariadb genenetwork-qc - -* The `-RR` option ensures `guix pack` produces relocatable[1] binaries -* The `-S` adds the symbolic links to the pack[1] -* The `-L`[2] option adds the ~guix-bioinformatics~ directory to the front of the module load path - -We then add python, redis, mariadb, and genenework-qc packages (and their dependencies) to the pack. - -## Footnotes - -=> [1] https://guix.gnu.org/manual/en/html_node/Invoking-guix-pack.html -=> [2] https://www.gnu.org/software/guile/manual/guile.html#Command_002dline-Options diff --git a/topics/cd-logs.gmi b/topics/cd-logs.gmi deleted file mode 100644 index fbf0941..0000000 --- a/topics/cd-logs.gmi +++ /dev/null @@ -1,6 +0,0 @@ -# Continuous Deployment logs - -The logs of the continuously deployed development instances of genenetwork may be found at /export2/guix-containers/genenetwork-development/var/log/cd in tux02. - -This documentation may be out of date. So, if you can't find the logs on that path, read the container building script and look in the path that is mounted at /var/log/cd in the container. -=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development-deploy.sh diff --git a/topics/ci-cd/cd-logs.gmi b/topics/ci-cd/cd-logs.gmi new file mode 100644 index 0000000..fbf0941 --- /dev/null +++ b/topics/ci-cd/cd-logs.gmi @@ -0,0 +1,6 @@ +# Continuous Deployment logs + +The logs of the continuously deployed development instances of genenetwork may be found at /export2/guix-containers/genenetwork-development/var/log/cd in tux02. + +This documentation may be out of date. So, if you can't find the logs on that path, read the container building script and look in the path that is mounted at /var/log/cd in the container. +=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development-deploy.sh diff --git a/topics/ci-cd/ci-rethink-slides.gmi b/topics/ci-cd/ci-rethink-slides.gmi new file mode 100644 index 0000000..e8cb601 --- /dev/null +++ b/topics/ci-cd/ci-rethink-slides.gmi @@ -0,0 +1,28 @@ +# CI Rethink slides + +## Background +* guix-forge is an independent general purpose project that powers our CI +* guix-forge is built on laminar +* CI jobs are just simple scripts +* scripts are written in G-expressions, but don't have to be + +## So far +* purpose-written scripts that were executed in profiles with the necessary dependencies +* dependencies are from current Guix +* container had to be rebuilt to update dependencies +* jobs did not have access to high-level Guix features (package definitions, build systems, manifests, etc.) +* jobs only understood low-level features (profile paths, store paths, etc.) +* One advantage: job runs were fast + +## What's new? +* jobs have access to the full expressive power of Guix +* project repositories are channels +* jobs pull from the channels and build all packages defined therein +* job script definitions now in package definitions from channels +* container need not be rebuilt each time dependencies change +* Perfect reproducibility: Make Fred's life easier +* One disadvantage: job runs will take a while + +## Why now? +* Ludo's blog post “From development environments to continuous integration” +* realization that project repositories could be channels \ No newline at end of file diff --git a/topics/ci-rethink-slides.gmi b/topics/ci-rethink-slides.gmi deleted file mode 100644 index e8cb601..0000000 --- a/topics/ci-rethink-slides.gmi +++ /dev/null @@ -1,28 +0,0 @@ -# CI Rethink slides - -## Background -* guix-forge is an independent general purpose project that powers our CI -* guix-forge is built on laminar -* CI jobs are just simple scripts -* scripts are written in G-expressions, but don't have to be - -## So far -* purpose-written scripts that were executed in profiles with the necessary dependencies -* dependencies are from current Guix -* container had to be rebuilt to update dependencies -* jobs did not have access to high-level Guix features (package definitions, build systems, manifests, etc.) -* jobs only understood low-level features (profile paths, store paths, etc.) -* One advantage: job runs were fast - -## What's new? -* jobs have access to the full expressive power of Guix -* project repositories are channels -* jobs pull from the channels and build all packages defined therein -* job script definitions now in package definitions from channels -* container need not be rebuilt each time dependencies change -* Perfect reproducibility: Make Fred's life easier -* One disadvantage: job runs will take a while - -## Why now? -* Ludo's blog post “From development environments to continuous integration” -* realization that project repositories could be channels \ No newline at end of file diff --git a/topics/code-antipatterns.gmi b/topics/code-antipatterns.gmi deleted file mode 100644 index 2544451..0000000 --- a/topics/code-antipatterns.gmi +++ /dev/null @@ -1,93 +0,0 @@ -# Coding Anti-Patterns - -This document contains some anti-patterns that have either been discussed during code-reviews or noticed by some one when refactoring any of GN's code-base. Use these "ideas" as a reference, and apply reasonable judgement depending on what you are working on. - - -## [Python] Don't pass mutable types as arguments - -Consider: - -``` -from typing import List - - -_l = ['a', 'b', 'c'] - - -def add_pvalue(l: List, val: str) -> List: - l.append(val) - return l - -print(f"{_l=}\n") -print(f"{add_pvalue(_l, 'd')=}\n") -print(f"{_l=}") - -``` - -which outputs (note that _l in the global scope has changed): - -``` -_l=['a', 'b', 'c'] - -add_pvalue(_l, 'd')=['a', 'b', 'c', 'd'] - -_l=['a', 'b', 'c', 'd'] -``` - -A better fix would be: - - -``` -from typing import List - - -_l = ['a', 'b', 'c'] - - -def add_pvalue(l: List, val: str) -> List: - l = l.copy() - l.append(val) - return l - -print(f"{_l=}\n") -print(f"{add_pvalue(_l, 'd')=}\n") -print(f"{_l=}") -``` - -which now does the right thing: - -``` -_l=['a', 'b', 'c'] - -add_pvalue(_l, 'd')=['a', 'b', 'c', 'd'] - -_l=['a', 'b', 'c'] - -``` - -Best, use immutable types: - -``` -from typing import Tuple - - -_l = ['a', 'b', 'c'] - - -def add_pvalue(l: Tuple, val: str) -> Tuple: - return l + (val,) - -print(f"{_l=}\n") -print(f"{add_pvalue(tuple(_l), 'p')=}\n") -print(f"{_l=}") -``` - -which outputs: - -``` -_l=['a', 'b', 'c'] - -add_pvalue(tuple(_l), 'p')=('a', 'b', 'c', 'p') - -_l=['a', 'b', 'c'] -``` diff --git a/topics/coding-guidelines.gmi b/topics/coding-guidelines.gmi deleted file mode 100644 index 8f83ba5..0000000 --- a/topics/coding-guidelines.gmi +++ /dev/null @@ -1,16 +0,0 @@ -# Coding guidelines - -We aim to adhere to the following coding guidelines. - -=> /topics/use-exceptions-to-indicate-errors Exceptions, not None return values -=> /topics/maybe-monad Maybe monad, not None values -=> /topics/better-logging Log messages -=> /topics/code-antipatterns Coding Anti-Patterns -=> /topics/lisp/debugging Debugging lisp -=> /topics/lisp/common-lisp-sly Common LISP: sly -=> /topics/lisp/define-condition Common Lisp Condition Handling -=> /topics/lisp/lisp4schemers Lisp For Schemers -=> /topics/lisp/tips-and-tricks Defining a scheme sexp comment with a reader macro -=> /topics/profiling_python_code Profiling Python code - -This document is an index of other documents describing coding guidelines. Add more here as you write/discover them. diff --git a/topics/database/setting-up-local-development-database.gmi b/topics/database/setting-up-local-development-database.gmi new file mode 100644 index 0000000..3b743b9 --- /dev/null +++ b/topics/database/setting-up-local-development-database.gmi @@ -0,0 +1,321 @@ +# Setting up Local Development Database + +You need to set up a quick local database for development without polluting your environment. + +## Method 0 (tunnel) + +You can use ssh tunneling to access mysql from your machine. Try something like: + +``` +ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org +mysql -uwebqtlout -pwebqtlout -h 127.0.0.1 db_webqtl -A -e "show tables;" +``` + +If you already have that port in use on your machine try + +``` +ssh -L 3307:127.0.0.1:3306 -f -N tux02.genenetwork.org +mysql -uwebqtlout -pwebqtlout -h 127.0.0.1 -P 3307 db_webqtl -A -e "show tables;" +``` + +To keep the connection alive add something like this to your `~/.ssh/config` + +``` +ServerAliveInterval 60 +ServerAliveCountMax 10 +``` + +For specific hosts you can set it up as + +``` +Host tux02 + HostName tux02.genenetwork.org + TCPKeepAlive yes + ServerAliveInterval 60 + user myname +``` + +## Method 1 (Using Guix system containers) + +Setting up mariadb in a Guix container is the preferred and easier method. But, you need root access to run the container. The genenetwork2 repo comes with a guix system container definition to run MariaDB and Redis. From the genenetwork2 repo, you can build and run the container using: +``` +$ sudo $(./containers/db-container.sh) +``` +You should now be able to connect to the database using +``` +$ mysql --protocol tcp -u root +``` +Create a database db_webqtl_s +``` +MariaDB [mysql]> CREATE DATABASE db_webqtl_s; +``` +Load the small database dump into the database. You may find the small database either on space or tux02 at /home/aruni/gn2.sql.lz +``` +$ lzip -cd gn2.sql.lz | mysql --protocol tcp -u root db_webqtl_s +``` +Since this is a develpoment server accessible only from localhost, it is ok to use the root user with no password. Configure your development instance of genenetwork2 with the following SQL_URI. +``` +SQL_URI="mysql://root@127.0.0.1:3306/db_webqtl_s" +``` + +## Method 2 (running a local container) + +A local container runs as a normal user. It requires some juggling of dirs to load an existing database. Make sure to not run on the same dirs as another mariadb instance(!) +Here we run a new database, but you can use an existing DB as was done in + +=> precompute-mapping-input-data.gmi + +``` +cd ~/tmp/mariadb +mkdir var +~/tmp/mariadb$ ~/opt/guix-pull/bin/guix shell -C -N coreutils sed mariadb --share=var=/var --share=/export2/tmp=/tmp +``` + +inside the container + +``` +mkdir -p /var/lib/data +mkdir -p /var/lib/mysql +mkdir /var/run +``` + +Initialize with + +``` +mysql_install_db +``` + +and run, for example + +``` +export TMPDIR=/tmp +mysqld_safe --datadir='/var/lib/mysql/' --port=3307 --user=$USER --group=users --nowatch --socket=/var/run/mysqld/mysqld.sock +``` + +Now from the container you should be able to connect with the socket + +``` +/export/mysql$ ~/opt/guix-pull/bin/guix shell mysql -- mysql --socket=var/run/mysqld/mysqld.sock -uwebqtlout -pwebqtlout db_webqtl +``` + +and + +``` +Welcome to the MariaDB monitor. Commands end with ; or \g. +Your MariaDB connection id is 3 +Server version: 10.5.12-MariaDB MariaDB Server + +Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others. + +Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. + +MariaDB [(none)]> show databases; ++--------------------+ +| Database | ++--------------------+ +| information_schema | +| mysql | +| performance_schema | +| test | ++--------------------+ +4 rows in set (0.001 sec) +``` + +To run/bind on a network interface we can open up with: + +``` +export TMPDIR=/tmp +mysqld_safe --datadir='/var/lib/mysql/' --port=3307 --user=$USER --group=users --nowatch --bind-address 127.0.0.1 --socket=/var/run/mysqld/mysqld.sock +``` + +Test with + + +``` + ~/opt/guix-pull/bin/guix shell mysql -- mysql -uwebqtlout -pwebqtlout db_webqtl -h 127.0.0.1 --port=3307 +``` + +Please don't use world writeable bind-address 0.0.0.0. If you run it for your own purposes use ssh tunneling instead (see above). + +If you need to tweak the server configuration you can load the my.cnf file with the `--defaults-file=var/my.cnf` inside and outside the container. + +## Method 3 (Manual method without Guix) + +/This is not recommended/ + +* An assumption is made that the GeneNetwork2 profile is in ~/opt/gn_profiles/gn2_latest for the purposes of this documentation. Please replace as appropriate. +* We install the database files under ~/genenetwork/mariadb. Change as appropriate. + +Set up directories + +``` +mkdir -pv ~/genenetwork/mariadb/var/run +mkdir -pv ~/genenetwork/mariadb/var/lib/data +mkdir -pv ~/genenetwork/mariadb/var/lib/mysql +``` + +Set up default my.cnf + +``` +cat < ~/genenetwork/mariadb/my.cnf +[client-server] +socket=~/genenetwork/mariadb/var/run/mysqld/mysqld.sock +port=3307 + +[server] +user=$(whoami) +socket=~/genenetwork/mariadb/var/run/mysqld/mysqld.sock +basedir=~/opt/gn_profiles/gn2_latest +datadir=~/genenetwork/mariadb/var/lib/data +ft_min_word_len=3 +EOF +``` + +Install the database + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql_install_db \ + --defaults-file=~/genenetwork/mariadb/my.cnf +``` + +Running the daemon: + +``` +~/opt/gn_profiles/gn2_latest/bin/mysqld_safe \ + --defaults-file=~/genenetwork/mariadb/my.cnf +``` + +Connect to daemon + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf +``` + +Set up password for user + +``` +MariaDB [(none)]> USE mysql; +MariaDB [mysql]> ALTER USER ''@'localhost' IDENTIFIED BY ''; +MariaDB [mysql]> FLUSH PRIVILEGES; +``` + +Now logout and login again with + +``` +$ ~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf --password mysql +``` + +enter the newly set password and voila, you are logged in and your user has the password set up. + +Now, set up a new user, say webqtlout, and a default database they can connect to + +``` +MariaDB [mysql]> CREATE DATABASE webqtlout; +MariaDB [mysql]> CREATE USER 'webqtlout'@'localhost' IDENTIFIED BY ''; +MariaDB [mysql]> GRANT ALL PRIVILEGES ON webqtlout.* TO 'webqtlout'@'localhost'; +``` + +Now logout, and log back in as the new webqtlout user: + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf \ + --user=webqtlout --host=localhost --password webqtlout +``` + +and enter the password you provided. + +### Setting up the Small Database + +Download the database from + +=> http://ipfs.genenetwork.org/ipfs/QmRUmYu6ogxEdzZeE8PuXMGCDa8M3y2uFcfo4zqQRbpxtk + +Say you downloaded the file in ~/Downloads, you can now add the database to your server. + +First stop the server: + +``` +$ ps aux | grep mysqld # get the process ids +$ kill -s SIGTERM +``` + +Now extract the database archive in the mysql data directory: + +``` +$ cd ~/genenetwork/mariadb/var/lib/data +$ p7zip -k -d ~/Downloads/db_webqtl_s.7z +``` + +Now restart the server: + +``` +~/opt/gn_profiles/gn2_latest/bin/mysqld_safe \ + --defaults-file=~/genenetwork/mariadb/my.cnf +``` + +Then update the databases + +``` +$ ~/opt/gn_profiles/gn2_latest/bin/mysql_upgrade \ + --defaults-file=~/genenetwork/mariadb/my.cnf \ + --user=frederick --password --force +``` + +and login as the administrative user: + +``` +$ ~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf \ + --user=$(whoami) --password +``` + +and grant the privileges to your normal user: + +``` +MariaDB [mysql]> GRANT ALL PRIVILEGES ON db_webqtl_s.* TO 'webqtlout'@'localhost'; +``` + +now logout as the administrative user and log back in as the normal user + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf \ + --user=webqtlout --host=localhost --password db_webqtlout_s + +MariaDB [db_webqtlout_s]> SELECT * FROM ProbeSetData LIMIT 20; +``` + +verify you see some data. + +### A Note on Connection to the Server + +So far, we have been connecting to the server by specifying --defaults-file option, e.g. + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql \ + --defaults-file=~/genenetwork/mariadb/my.cnf \ + --user=webqtlout --host=localhost --password db_webqtlout_s +``` + +which allows connection via the unix socket. + +We could drop that specification and connect via the port with: + +``` +~/opt/gn_profiles/gn2_latest/bin/mysql \ + --user=webqtlout --host=127.0.0.1 --port=3307 --password db_webqtlout_s +``` + +In this version, the host specification was changed from +``` +--host=localhost +``` +to +``` +--host=127.0.0.1 +``` + +whereas, the **--defaults-file** file specification was dropped and a new **--port** specification was added. diff --git a/topics/deploy/deployment.gmi b/topics/deploy/deployment.gmi new file mode 100644 index 0000000..b844821 --- /dev/null +++ b/topics/deploy/deployment.gmi @@ -0,0 +1,36 @@ +# Deploy GeneNetwork + +# Description + +This page attempts to document the deployment process we have for GeneNetwork. We use Guix system containers for deployment of CI/CD and the Guix configuration for the CI/CD container should be considered the authoritative reference. + +=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development.scm + +See also + +=> ./guix-system-containers-and-how-we-use-them + +## genenetwork2 + +To install GN2 by hand for development we also track + +=> ./developing-against-gn +=> ./installation + +## genenetwork3 + +Deployment-specific settings for genenetwork3 should be written to a configuration file and the path to that file should be set in the GN3_CONF environment variable. + +Here's a typical configuration file. Please take care to customize it to your specific requirements. +``` +SPARQL_ENDPOINT="http://localhost:9082/sparql" +DATA_DIR="/export/data/genenetwork" +XAPIAN_DB_PATH="/export/data/genenetwork-xapian" +``` +DATA_DIR must contain two directories—genotype_files and synteny. + +## deploy guix + +See also + +=> guix-profiles.gmi diff --git a/topics/deploy/installation.gmi b/topics/deploy/installation.gmi new file mode 100644 index 0000000..ef25079 --- /dev/null +++ b/topics/deploy/installation.gmi @@ -0,0 +1,354 @@ +# Installation + +This document is WIP and still a mixture of old and new docs. + +Large system deployments can get very complex. In this document we explain the GeneNetwork reproducible deployment system which is based on GNU Guix The Guix system can be used to install GN with all its files and dependencies. + +Note that the official deployment works through a Guix VM. This is described in + +=> ./deployment + +# Check list + +To run GeneNetwork the following services need to function: + +* [ ] GNU Guix with a guix profile for genenetwork2 +* [ ] A path to the (static) genotype files +* [?] Gn-proxy for authentication +* [ ] The genenetwork3 service +* [ ] Redis +* [ ] Mariadb + +# Installing Guix packages + +Make sure to install GNU Guix using the binary download instructions on the main website. Follow the instructions on Note the download amounts to several GBs of data. Debian-derived distros may support + +``` +apt-get install guix +``` + +# Creating a GNU Guix profile + +We run a GNU Guix channel with packages at + +=> https://gitlab.com/genenetwork/guix-bioinformatics + +The README has instructions hosting a channel (recommended!), but sometimes we use the GUIX_PACKAGE_PATH instead. First upgrade to a recent guix with + +``` +mkdir ~/opt +guix pull -p ~/opt/guix-pull +``` + +It should upgrade (ignore the locales warnings). You can optionally specify the specific git checkout of guix with + +``` +guix pull -p ~/opt/guix-pull --commit=f04883d +``` + +which is useful when you need to roll back to an earlier version (sometimes our channel goes out of sync). Next, we install GeneNetwork2 with + +``` +source ~/opt/guix-pull/etc/profile +git clone https://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics.git ~/guix-bioinformatics +``` + +you probably also need guix-past (the upstream channel for older packages): + +``` +git clone https://gitlab.inria.fr/guix-hpc/guix-past.git ~/guix-past +cd ~/guix-past +env GUIX_PACKAGE_PATH=$HOME/guix-bioinformatics:$HOME/guix-past/modules ~/opt/guix-pull/bin/guix package -i genenetwork2 -p ~/opt/genenetwork2 +``` + +Ignore the warnings. Guix should install the software without trying to build everything. If you system insists on building all packages, try the `--dry-run` switch and fix the [[https://guix.gnu.org/manual/en/html_node/Substitute-Server-Authorization.html][substitutes]]. You may add the `--substitute-urls="http://guix.genenetwork.org https://ci.guix.gnu.org https://mirror.hydra.gnu.org"` switch. + +The guix.genenetwork.org has most of our packages pre-built(!). To use it on your own machine the public key is + +``` +(public-key + (ecc + (curve Ed25519) + (q #9F56EAB5CE37AA15693C31F451140588240F259676C137E31C0CA70EC4D1B534#) + ) + ) +``` + +Once we have a GNU Guix profile, a running database (see below) and the file storage, +we should be ready to fire up GeneNetwork: + +# Running GN2 + +Check out the source with git: + +``` +git clone git@github.com:genenetwork/genenetwork2.git +cd genenetwork2 +``` + +You may want to use the testing branch. + +Run GN2 with earlier created Guix profile + +``` +export GN2_PROFILE=$HOME/opt/genenetwork2 +env TMPDIR=$HOME/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5012 GENENETWORK_FILES=/export/data/genenetwork/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 etc/default_settings.py -gunicorn-dev +``` + +The script comes with debug and logging switches can be particularly useful when +developing GN2. Location and files are examples. + +It may be useful to tunnel the web server to your local browser with an ssh tunnel: + +## Testing on an ssh tunnnel + +If you want to test a service running on the server on a certain port (say 8202) use + + ssh -L 8202:127.0.0.1:8202 -f -N myname@penguin2.genenetwork.org + +And browse on your local machine to http://localhost:8202/ + +# BELOW INFORMATION NEEDS TO BE UPDATED + +* Run gn-proxy + +GeneNetwork requires a separate gn-proxy server which handles +authorisation and access control. For instructions see the +[[https://github.com/genenetwork/gn-proxy][README]]. Note it may already be running on our servers! + +* Run Redis + +Redis part of GN2 deployment and will be started by the ./bin/genenetwork2 +startup script. + +* Run MariaDB server +** Install MariaDB with GNU GUIx + +These are the steps you can take to install a fresh installation of +mariadb (which comes as part of the GNU Guix genenetwork2 install). + +As root configure the Guix profile + +: . ~/opt/genenetwork2/etc/profile + +and run for example + +#+BEGIN_SRC bash +adduser mariadb && addgroup mariadb +mkdir -p /export2/mariadb/database +chown mariadb.mariadb -R /export2/mariadb/ +mkdir -p /var/run/mysqld +chown mariadb.mariadb /var/run/mysqld +su mariadb +mysql --version + mysql Ver 15.1 Distrib 10.1.45-MariaDB, for Linux (x86_64) using readline 5.1 +mysql_install_db --user=mariadb --datadir=/export2/mariadb/database +mysqld -u mariadb --datadir=/exportdb/mariadb/database/mariadb --explicit_defaults_for_timestamp -P 12048" +#+END_SRC + +If you want to run as root you may have to set + +: /etc/my.cnf +: [mariadbd] +: user=root + +You also need to set + +: ft_min_word_len = 3 + +To make sure word text searches (shh) work and rebuild the tables if +required. + +To check error output in a file on start-up run with something like + +: mariadbd -u mariadb --console --explicit_defaults_for_timestamp --datadir=/gnu/mariadb --log-error=~/test.log + +Other tips are that Guix installs mariadbd in your profile, so this may work + +: /home/user/.guix-profile/bin/mariadbd -u mariadb --explicit_defaults_for_timestamp --datadir=/gnu/mariadb + +When you get errors like: + +: qlalchemy.exc.IntegrityError: (_mariadb_exceptions.IntegrityError) (1215, 'Cannot add foreign key constraint') + +you may need to set + +: set foreign_key_checks=0 + +** Load the small database in MySQL + +At this point we require the underlying distribution to install and +run mysqld (see next section for GNU Guix). Currently we have two databases for deployment, +'db_webqtl_s' is the small testing database containing experiments +from BXD mice and 'db_webqtl_plant' which contains all plant related +material. + +Download one database from + +http://ipfs.genenetwork.org/ipfs/QmRUmYu6ogxEdzZeE8PuXMGCDa8M3y2uFcfo4zqQRbpxtk + +After installation unzip the database binary in the MySQL directory + +#+BEGIN_SRC sh +cd ~/mysql +p7zip -d db_webqtl_s.7z +chown -R mysql:mysql db_webqtl_s/ +chmod 700 db_webqtl_s/ +chmod 660 db_webqtl_s/* +#+END_SRC + +restart MySQL service (mysqld). Login as root + +: mysql_upgrade -u root --force + +: myslq -u root + +and + +: mysql> show databases; +: +--------------------+ +: | Database | +: +--------------------+ +: | information_schema | +: | db_webqtl_s | +: | mysql | +: | performance_schema | +: +--------------------+ + +Set permissions and match password in your settings file below: + +: mysql> grant all privileges on db_webqtl_s.* to gn2@"localhost" identified by 'webqtl'; + +You may need to change "localhost" to whatever domain you are +connecting from (mysql will give an error). + +Note that if the mysql connection is not working, try connecting to +the IP address and check server firewall, hosts.allow and mysql IP +configuration (see below). + +Note for the plant database you can rename it to db_webqtl_s, or +change the settings in etc/default_settings.py to match your path. + +* Get genotype files + +The script looks for genotype files. You can find them in +http://ipfs.genenetwork.org/ipfs/QmXQy3DAUWJuYxubLHLkPMNCEVq1oV7844xWG2d1GSPFPL + +#+BEGIN_SRC sh +mkdir -p $HOME/genotype_files +cd $HOME/genotype_files + +#+END_SRC + +* GN2 Dependency Graph + +List of all runtime dependencies for GN2 as installed by GNU Guix. + +https://genenetwork.org/environments/ + +* Working with the GN2 source code + +See [[development.org]]. + +* Read more + +If you want to understand the architecture of GN2 read +[[Architecture.org]]. The rest of this document is mostly on deployment +of GN2. + +* Trouble shooting + +** ImportError: No module named jinja2 + +If you have all the Guix packages installed this error points out that +the environment variables are not set. Copy-paste the paths into your +terminal (mainly so PYTHON_PATH and R_LIBS_SITE are set) from the +information given by guix: + +: guix package --search-paths + +On one system: + +: export PYTHONPATH="$HOME/.guix-profile/lib/python3.8/site-packages" +: export R_LIBS_SITE="$HOME/.guix-profile/site-library/" +: export GEM_PATH="$HOME/.guix-profile/lib/ruby/gems/2.2.0" + +and perhaps a few more. +** ERROR: 'can not find directory $HOME/gn2_data' or 'can not find directory $HOME/genotype_files/genotype' + +The default settings file looks in your $HOME/gn2_data. Since these +files come with a Guix installation you should take a hint from the +values in the installed version of default_settings.py (see above in +this document). + +You can use the GENENETWORK_FILES switch to set the datadir, for example + +: env GN2_PROFILE=~/opt/gn-latest GENENETWORK_FILES=/gnu/data/gn2_data ./bin/genenetwork2 + +** Can't run a module + +In rare cases, development modules are not brought in with Guix +because no source code is available. This can lead to missing modules +on a running server. Please check with the authors when a module +is missing. +** Rpy2 error 'show' now found + +This error + +: __show = rpy2.rinterface.baseenv.get("show") +: LookupError: 'show' not found + +means that R was updated in your path, and that Rpy2 needs to be +recompiled against this R - don't you love informative messages? + +In our case it means that GN's PYTHONPATH is not in sync with +R_LIBS_SITE. Please check your GNU Guix GN2 installation paths, +you man need to reinstall. Note that this may be the point you +may want to start using profiles (see profile section). + +** Mysql can't connect server through socket ERROR + +The following error + +: sqlalchemy.exc.OperationalError: (_mysql_exceptions.OperationalError) (2002, 'Can\'t connect to local MySQL server through socket \'/run/mysqld/mysqld.sock\' (2 "No such file or directory")') + +means that MySQL is trying to connect locally to a non-existent MySQL +server, something you may see in a container. Typically replicated with something like + +: mysql -h localhost + +try to connect over the network interface instead, e.g. + +: mysql -h 127.0.0.1 + +if that works run genenetwork after setting SQL_URI to something like + +: export SQL_URI=mysql://gn2:mysql_password@127.0.0.1/db_webqtl_s + +* NOTES + +** Deploying GN2 official + +Let's see how fast we can deploy a second copy of GN2. + +- [ ] Base install + + [ ] First install a Debian server with GNU Guix on board + + [ ] Get Guix build going + - [ ] Build the correct version of Guix + - [ ] Check out the correct gn-stable version of guix-bioinformatics http://git.genenetwork.org/pjotrp/guix-bioinformatics + - [ ] guix package -i genenetwork2 -p /usr/local/guix-profiles/gn2-stable + + [ ] Create a gn2 user and home with space + + [ ] Install redis + - [ ] add to systemd + - [ ] update redis.cnf + - [ ] update database + + [ ] Install mariadb (currently debian mariadb-server) + - [ ] add to systemd + - [ ] system stop mysql + - [ ] update mysql.cnf + - [ ] update database (see gn-services/services/mariadb.md) + - [ ] check tables + + [ ] run gn2 + + [ ] update nginx + + [ ] install genenetwork3 + - [ ] add to systemd diff --git a/topics/deploy/machines.gmi b/topics/deploy/machines.gmi new file mode 100644 index 0000000..49c3ee6 --- /dev/null +++ b/topics/deploy/machines.gmi @@ -0,0 +1,46 @@ +# Machines + +``` +- [X] lambda01 172.23.18.212 (7c:c2:55:11:9c:ac) +- [ ] tux03i 172.23.17.181 (00:0a:f7:c1:00:8d) - 10 Gbs + [X] tux03 128.169.5.101 (00:0a:f7:c1:00:8b) - 1 Gbs +- [X] tux06 172.23.17.188 (14:23:f2:4e:29:10) +- [X] tux07 172.23.17.191 (14:23:f2:4e:7d:60) +- [X] tux08 172.23.17.186 (14:23:f2:4f:4e:b0) +- [X] tux09 172.23.17.182 (14:23:f2:4e:49:10) +- [X] space 128.169.5.175 (e4:3d:1a:80:6c:40) +- [ ] octopus01f 172.23.18.221 (2c:ea:7f:60:bf:61) +- [ ] octopus02f 172.23.22.159 (2c:ea:7f:60:bd:61) +- [ ] octopus03f 172.23.19.187 (2c:ea:7f:60:ac:2b) +- [ ] octopus04f 172.23.17.254 (2c:ea:7f:60:af:14) +``` + +## Out-of-band access + +c for console or control + +``` +- [ ] DNS entries no longer visible +- [X] lambda01-c 172.23.17.173 (3c:ec:ef:aa:e5:50) +- [X] tux01-c 172.23.31.85 (58:8A:5A:F9:3A:22) +- [X] tux02-c 172.23.30.40 (58:8A:5A:F0:E6:E4) +- [X] tux03-c 172.23.31.87 (D0:94:66:37:39:D5) +- [X] tux04-c 172.23.18.222 (C8:4B:D6:97:03:7D) +- [X] tux05-c 172.23.22.218 (C8:4B:D6:97:05:C3) +- [X] tux06-c 172.23.18.219 (C8:4B:D6:9B:8B:3A) +- [X] tux07-c 172.23.21.85 (C8:4B:D6:9B:8B:8E) +- [X] tux08-c 172.23.17.161 (C8:4B:D6:9B:8B:76) +- [X] tux09-c 172.23.17.187 (C8:4B:D6:9B:8D:14) +- [X] space-c 172.23.31.74 (CC:48:3A:13:DB:45) +- [X] octopus01-c 172.23.31.39 (2C:EA:7F:F2:9E:10) +- [X] octopus02-c 172.23.31.37 (2C:EA:7F:F2:D8:74) +- [X] octopus03-c 172.23.31.94 (2C:EA:7F:F2:CC:34) +- [X] octopus04-c 172.23.31.38 (2C:EA:7F:F2:CF:F4) +- [X] octopus05-c 172.23.31.95 (2C:EA:7F:F2:D2:7C) +- [X] octopus06-c 172.23.31.36 (2C:EA:7F:F2:CF:A4) +- [X] octopus07-c 172.23.31.65 (2C:EA:7F:F2:D8:14) +- [X] octopus08-c 172.23.31.66 (2C:EA:7F:F2:CA:D4) +- [X] octopus09-c 172.23.31.71 (2C:EA:7F:F2:D3:74) +- [X] octopus10-c 172.23.31.33 (2C:EA:7F:F2:D2:6C) +- [X] octopus11-c 172.23.31.64 (2C:EA:7F:F2:CE:84) +``` diff --git a/topics/deploy/our-virtuoso-instances.gmi b/topics/deploy/our-virtuoso-instances.gmi new file mode 100644 index 0000000..0336018 --- /dev/null +++ b/topics/deploy/our-virtuoso-instances.gmi @@ -0,0 +1,41 @@ +# Our virtuoso instances + +We run three instances of virtuoso. + +* virtuoso on tux01 for internal use by the production server +* virtuoso on tux02 in the development (aka CI/CD) container for internal use by the CD +* virtuoso on tux02 as a public SPARQL endpoint + +The public SPARQL endpoint is accessible at +=> https://sparql.genenetwork.org/sparql + +## Configuration + +All our virtuoso instances are deployed in Guix system containers. The configuration for these containers is at + +=> https://github.com/genenetwork/genenetwork-machines/blob/main/virtuoso.scm Configuration for internal production virtuoso on tux01 +=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development.scm Configuration for internal CD virtuoso on tux02 +=> https://github.com/genenetwork/genenetwork-machines/blob/main/public-sparql.scm Configuration for public SPARQL endpoint virtuoso on tux02 + +## Ports + +The ports these virtuoso instances are deployed on is subject to change, and you should really look up the Guix configuration file to find the correct port. But, here is a summary for quick lookup. Please fix this if it is inaccurate. + +### Internal production virtuoso on tux01 + +* Server port: 8891 +* SPARQL endpoint port: 8892 + +### Internal CD virtuoso on tux02 + +* Server port: 9081 +* SPARQL endpoint port: 9082 + +### Public SPARQL endpoint virtuoso on tux02 + +* Server port: 8981 +* SPARQL endpoint port: 8982 + +## Passwords + +Password based authentication is required to mutate the data in these virtuoso instances. These passwords are available with Pjotr and Arun. Please contact them if you need access. diff --git a/topics/deploy/useful-shell-scripts.gmi b/topics/deploy/useful-shell-scripts.gmi new file mode 100644 index 0000000..908928e --- /dev/null +++ b/topics/deploy/useful-shell-scripts.gmi @@ -0,0 +1,52 @@ +# Useful Shell Scripts + +I make it a habit to put all my user-scripts in `~/bin/' and add that to my `$PATH'. Another useful tip is to prepend all my scripts using a "," - this allows me to quickly take advantage of zsh's autocomplete. For the curious, you could also adopt quiuy as part of the scripts - it has the advantage of adding more semantic meaning to your scripts. + +Most of these scripts were borrowed from: + +=> https://git.sr.ht/~whereiseveryone/dot/tree/master/item/bin + +Here are the scripts that make me more efficient on the terminal: + +* Run a script in the context of a directory: ",run-with-dir" + +``` +#!/bin/sh +set -eo pipefail + +# Run a command in specific directory +run_within_dir() { + target_dir="$1" + previous_dir=$(pwd) + shift + cd $target_dir && "$@" + cd $previous_dir +} + +run_within_dir $@ +``` + +If you are in `$HOME', you can do something like: "run-within-dir /tmp" + +* Choose a guix profile on the fly: ",choose-profile": + +``` +#!/bin/env sh + +# To run this use source! + +GUIX_PROFILE="$(guix package --list-profiles | fzf --multi)" + +export GUIX_PROFILE +. "$GUIX_PROFILE/etc/profile" +``` + +* Run magit from any terminal: ",magit". You can take of fzf's autocomplete + +``` +#!/bin/env sh + +emacsclient --eval "(projectile-vc \"$PWD/$@\")" +``` + +Should these scripts become too many, a repository will be created and a link added to point there. diff --git a/topics/deploy/uthsc-vpn-with-free-software.gmi b/topics/deploy/uthsc-vpn-with-free-software.gmi new file mode 100644 index 0000000..651fb83 --- /dev/null +++ b/topics/deploy/uthsc-vpn-with-free-software.gmi @@ -0,0 +1,60 @@ +# UTHSC VPN with free software + +It is possible to connect to the UTHSC VPN using only free software. For this, you need the openconnect-sso package. openconnect-sso is a wrapper around openconnect that handles the web-based single sign-on and runs openconnect with the right arguments. +=> https://github.com/vlaci/openconnect-sso/ openconnect-sso +=> https://www.infradead.org/openconnect/ openconnect + +To connect, run openconnect-sso as follows. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN. +``` +$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC +``` +Note that openconnect-sso should be run as a regular user, not as root. After passing Duo authentication, openconnect-sso will try to gain root priviliges to set up the network routes. At that point, it will prompt you for your password using sudo. + +## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling) + +openconnect, by default, tunnels all your traffic through the VPN. This is not good for your privacy. It is better to tunnel only the traffic destined to the specific hosts that you want to access. This can be done using the vpn-slice script. +=> https://github.com/dlenski/vpn-slice/ vpn-slice + +For example, to connect to the UTHSC VPN but only access the hosts tux01 and tux02e through the VPN, run the following command. +``` +$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -- --script 'vpn-slice tux01 tux02e' +``` +The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation. + +## Unsafe legacy TLS renegotiation + +The UTHSC VPN still requires unsafe legacy TLS renegotiation. This is disabled by default on the latest Guix. We need to re-enable it by configuring openssl.cnf as described on the following stackoverflow page. +=> https://stackoverflow.com/questions/71603314/ssl-error-unsafe-legacy-renegotiation-disabled +Here's a quick summary. Put the following in some file, say /tmp/openssl.cnf +``` +openssl_conf = openssl_init + +[openssl_init] +ssl_conf = ssl_sect + +[ssl_sect] +system_default = system_default_sect + +[system_default_sect] +Options = UnsafeLegacyRenegotiation +``` +Set the environment variable OPENSSL_CONF to point to this file. +``` +export OPENSSL_CONF=/tmp/openssl.cnf +``` +Then, run the openconnect-sso client as usual. + +## Putting it all together using Guix G-expressions + +Remembering to do all these steps is a hassle. Writing a shell script to automate this is a good idea, but why write shell scripts when we have G-expressions! Here's a G-expression script that I prepared earlier. +=> uthsc-vpn.scm +Download it, tweak the %hosts variable to specify the hosts you are interested in, and run it like so: +``` +$(guix build -f uthsc-vpn.scm) +``` + +## Acknowledgement + +Many thanks to Pjotr Prins and Erik Garrison without whose earlier work this guide would not be possible. +=> https://github.com/pjotrp/linux-at-university-of-tennessee +=> https://github.com/ekg/openconnect-sso-docker diff --git a/topics/deploy/uthsc-vpn.scm b/topics/deploy/uthsc-vpn.scm new file mode 100644 index 0000000..c714731 --- /dev/null +++ b/topics/deploy/uthsc-vpn.scm @@ -0,0 +1,44 @@ +(use-modules ((gnu packages guile-xyz) #:select (guile-ini guile-lib guile-smc)) + ((gnu packages vpn) #:select (openconnect-sso vpn-slice)) + (guix gexp)) + +;; Put in the hosts you are interested in here. +(define %hosts + (list "octopus01" + "tux01.genenetwork.org")) + +(define (ini-file name scm) + "Return a file-like object representing INI file with @var{name} and +@var{scm} data." + (computed-file name + (with-extensions (list guile-ini guile-lib guile-smc) + #~(begin + (use-modules (srfi srfi-26) + (ini)) + + (call-with-output-file #$output + (cut scm->ini #$scm #:port <>)))))) + +(define uthsc-vpn + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (setenv "OPENSSL_CONF" + #$(ini-file "openssl.cnf" + #~'((#f + ("openssl_conf" . "openssl_init")) + ("openssl_init" + ("ssl_conf" . "ssl_sect")) + ("ssl_sect" + ("system_default" . "system_default_sect")) + ("system_default_sect" + ("Options" . "UnsafeLegacyRenegotiation"))))) + (invoke #$(file-append openconnect-sso "/bin/openconnect-sso") + "--server" "uthscvpn1.uthsc.edu" + "--authgroup" "UTHSC" + "--" + "--script" (string-join (cons #$(file-append vpn-slice "/bin/vpn-slice") + '#$%hosts)))))) + +(program-file "uthsc-vpn" uthsc-vpn) diff --git a/topics/deployment.gmi b/topics/deployment.gmi deleted file mode 100644 index b844821..0000000 --- a/topics/deployment.gmi +++ /dev/null @@ -1,36 +0,0 @@ -# Deploy GeneNetwork - -# Description - -This page attempts to document the deployment process we have for GeneNetwork. We use Guix system containers for deployment of CI/CD and the Guix configuration for the CI/CD container should be considered the authoritative reference. - -=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development.scm - -See also - -=> ./guix-system-containers-and-how-we-use-them - -## genenetwork2 - -To install GN2 by hand for development we also track - -=> ./developing-against-gn -=> ./installation - -## genenetwork3 - -Deployment-specific settings for genenetwork3 should be written to a configuration file and the path to that file should be set in the GN3_CONF environment variable. - -Here's a typical configuration file. Please take care to customize it to your specific requirements. -``` -SPARQL_ENDPOINT="http://localhost:9082/sparql" -DATA_DIR="/export/data/genenetwork" -XAPIAN_DB_PATH="/export/data/genenetwork-xapian" -``` -DATA_DIR must contain two directories—genotype_files and synteny. - -## deploy guix - -See also - -=> guix-profiles.gmi diff --git a/topics/developing-against-gn.gmi b/topics/developing-against-gn.gmi deleted file mode 100644 index b94b681..0000000 --- a/topics/developing-against-gn.gmi +++ /dev/null @@ -1,198 +0,0 @@ -# Developing against GeneNetwork - -## Configuration - -GeneNetwork2 comes with a [default configuration file](./etc/default_settings.py) -which can be used as a starting point. - -The recommended way to deal with the configurations is to **copy** this default configuration file to a location outside of the repository, say, - -```sh -.../genenetwork2$ cp etc/default_settings.py "${HOME}/configurations/gn2.py" -``` - -then change the appropriate values in the new file. You can then pass in the new -file as the configuration file when launching the application, - -```sh -.../genenetwork2$ bin/genenetwork "${HOME}/configurations/gn2.py" -``` - -The other option is to override the configurations in `etc/default_settings.py` -by setting the configuration you want to override as an environment variable e.g. -to override the `SQL_URI` value, you could do something like: - -```sh -.../genenetwork2$ env SQL_URI="mysql://:@:/" \ - bin/genenetwork "${HOME}/configurations/gn2.py" -``` - -replacing the placeholders in the angle brackets with appropriate values. - -For a detailed breakdown of the configuration variables and their use, see the -[configuration documentation](doc/configurations.org) - -## Run - -Once having installed GN2 it can be run through a browser -interface - -```sh -genenetwork2 -``` - -A quick example is - -```sh -env GN2_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ - GENENETWORK_FILES=~/data/gn2_data/ \ - GN_PROXY_URL="http://localhost:8080"\ - GN3_LOCAL_URL="http://localhost:8081"\ - SPARQL_ENDPOINT=http://localhost:8892/sparql\ - ./bin/genenetwork2 ./etc/default_settings.py -gunicorn-dev -``` - -For full examples (you may need to set a number of environment -variables), including running scripts and a Python REPL, also see the -startup script [./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2). - -Also mariadb and redis need to be running, see -[INSTALL](./doc/README.org). - -## Debugging - -To run the application under the pdb debugger, you can add the `--with-pdb` -option when launching the application, for example: - -```sh -env GN2_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ - GENENETWORK_FILES=~/data/gn2_data/ \ - GN_PROXY_URL="http://localhost:8080"\ - GN3_LOCAL_URL="http://localhost:8081"\ - SPARQL_ENDPOINT=http://localhost:8892/sparql\ - ./bin/genenetwork2 ./etc/default_settings.py --with-pdb -``` - -**NOTE**: This should only ever be run in development. -**NOTE 2**: You will probably need to tell pdb to continue at least once before -the system begins serving the pages. - -Now, you can add the `breakpoint()` call wherever you need to debug and the -terminal where you started the application with `--with-pdb` will allow you to -issue commands to pdb to debug your application. - -## Development - -It may be useful to pull in the GN3 python modules locally. For this -use `GN3_PYTHONPATH` environment that gets injected in -the ./bin/genenetwork2 startup. - -A continuously deployed instance of genenetwork2 is available at -[https://cd.genenetwork.org/](https://cd.genenetwork.org/). This -instance is redeployed on every commit provided that the [continuous -integration tests](https://ci.genenetwork.org/jobs/genenetwork2) pass. - -## Testing - -To have tests pass, the redis and mariadb instance should be running, because of -asserts sprinkled in the code base. - -Right now, the only tests running in CI are unittests. Please make -sure the existing unittests are green when submitting a PR. - -From the root directory of the repository, you can run the tests with something -like: - -```sh -env GN_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ - SQL_URI= \ - ./bin/genenetwork2 ./etc/default_settings.py \ - -c -m pytest -vv -``` - -In the case where you use the default `etc/default_settings.py` configuration file, you can override any setting as demonstrated with the `SQL_URI` setting in the command above. - -In order to avoid having to set up a whole host of settings every time with the `env` command, you could copy the `etc/default_settings.py` file to a new location (outside the repository is best), and pass that to `bin/genenetwork2` instead. - -See -[./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/doc/docker-container.org) -for more details. - -#### Mechanical Rob - -We are building 'Mechanical Rob' automated testing using Python -[requests](https://github.com/genenetwork/genenetwork2/tree/testing/test/requests) -which can be run with: - -```sh -env GN2_PROFILE=~/opt/gn-latest \ - ./bin/genenetwork2 \ - GN_PROXY_URL="http://localhost:8080" \ - GN3_LOCAL_URL="http://localhost:8081 "\ - ./etc/default_settings.py -c \ - ../test/requests/test-website.py -a http://localhost:5003 -``` - -The GN2_PROFILE is the Guix profile that contains all -dependencies. The ./bin/genenetwork2 script sets up the environment -and executes test-website.py in a Python interpreter. The -a switch -says to run all tests and the URL points to the running GN2 http -server. - -#### Unit tests - -To run unittests, first `cd` into the genenetwork2 directory: - -```sh -# You can use the coverage tool to run the tests -# You could omit the -v which makes the output verbose -runcmd coverage run -m unittest discover -v - -# Alternatively, you could run the unittests using: -runpython -m unittest discover -v - -# To generate a report in wqflask/coverage_html_report/: -runcmd coverage html -``` - -The `runcmd` and `runpython` are shell aliases defined in the following way: - -```sh -alias runpython="env GN2_PROFILE=~/opt/gn-latest TMPDIR=/tmp SERVER_PORT=5004 GENENETWORK_FILES=/gnu/data/gn2_data/ GN_PROXY_URL="http://localhost:8080" GN3_LOCAL_URL="http://localhost:8081" ./bin/genenetwork2 - -alias runcmd="time env GN2_PROFILE=~/opt/gn-latest TMPDIR=//tmp SERVER_PORT=5004 GENENETWORK_FILES=/gnu/data/gn2_data/ GN_PROXY_URL="http://localhost:8080" GN3_LOCAL_URL="http://localhost:8081" ./bin/genenetwork2 ./etc/default_settings.py -cli" -``` - -Replace some of the env variables as per your use case. - -### Troubleshooting - -If the menu does not pop up check your `GN2_BASE_URL`. E.g. - -``` -curl http://gn2-pjotr.genenetwork.org/api/v_pre1/gen_dropdown -``` - -check the logs. If there is ERROR 1054 (42S22): Unknown column -'InbredSet.Family' in 'field list' it may be you are trying the small -database. - -### Run Scripts - -As part of the profiling effort, some scripts are added to run specific parts of the system under a profiler without running the entire web-server - as such, to run the script, you could do something like: - -``` -env HOME=/home/frederick \ - GN2_PROFILE=~/opt/gn2-latest \ - GN3_DEV_REPO_PATH=~/genenetwork/genenetwork3 \ - SQL_URI="mysql://username:password@host-ip:host-port/db_webqtl" \ - SPARQL_ENDPOINT=http://localhost:8892/sparql\ - SERVER_PORT=5001 \ - bin/genenetwork2 ../gn2_settings.py \ - -cli python3 -m scripts.profile_corrs \ - ../performance_$(date +"%Y%m%dT%H:%M:%S").profile -``` - -and you can find the performance metrics at the file specified, in this case, a file starting with `performance_` with the date and time of the run, and ending with `.profile`. - -Please replace the environment variables in the sample command above with the appropriate values for your environment. diff --git a/topics/genenetwork/developing-against-gn.gmi b/topics/genenetwork/developing-against-gn.gmi new file mode 100644 index 0000000..b94b681 --- /dev/null +++ b/topics/genenetwork/developing-against-gn.gmi @@ -0,0 +1,198 @@ +# Developing against GeneNetwork + +## Configuration + +GeneNetwork2 comes with a [default configuration file](./etc/default_settings.py) +which can be used as a starting point. + +The recommended way to deal with the configurations is to **copy** this default configuration file to a location outside of the repository, say, + +```sh +.../genenetwork2$ cp etc/default_settings.py "${HOME}/configurations/gn2.py" +``` + +then change the appropriate values in the new file. You can then pass in the new +file as the configuration file when launching the application, + +```sh +.../genenetwork2$ bin/genenetwork "${HOME}/configurations/gn2.py" +``` + +The other option is to override the configurations in `etc/default_settings.py` +by setting the configuration you want to override as an environment variable e.g. +to override the `SQL_URI` value, you could do something like: + +```sh +.../genenetwork2$ env SQL_URI="mysql://:@:/" \ + bin/genenetwork "${HOME}/configurations/gn2.py" +``` + +replacing the placeholders in the angle brackets with appropriate values. + +For a detailed breakdown of the configuration variables and their use, see the +[configuration documentation](doc/configurations.org) + +## Run + +Once having installed GN2 it can be run through a browser +interface + +```sh +genenetwork2 +``` + +A quick example is + +```sh +env GN2_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ + GENENETWORK_FILES=~/data/gn2_data/ \ + GN_PROXY_URL="http://localhost:8080"\ + GN3_LOCAL_URL="http://localhost:8081"\ + SPARQL_ENDPOINT=http://localhost:8892/sparql\ + ./bin/genenetwork2 ./etc/default_settings.py -gunicorn-dev +``` + +For full examples (you may need to set a number of environment +variables), including running scripts and a Python REPL, also see the +startup script [./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2). + +Also mariadb and redis need to be running, see +[INSTALL](./doc/README.org). + +## Debugging + +To run the application under the pdb debugger, you can add the `--with-pdb` +option when launching the application, for example: + +```sh +env GN2_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ + GENENETWORK_FILES=~/data/gn2_data/ \ + GN_PROXY_URL="http://localhost:8080"\ + GN3_LOCAL_URL="http://localhost:8081"\ + SPARQL_ENDPOINT=http://localhost:8892/sparql\ + ./bin/genenetwork2 ./etc/default_settings.py --with-pdb +``` + +**NOTE**: This should only ever be run in development. +**NOTE 2**: You will probably need to tell pdb to continue at least once before +the system begins serving the pages. + +Now, you can add the `breakpoint()` call wherever you need to debug and the +terminal where you started the application with `--with-pdb` will allow you to +issue commands to pdb to debug your application. + +## Development + +It may be useful to pull in the GN3 python modules locally. For this +use `GN3_PYTHONPATH` environment that gets injected in +the ./bin/genenetwork2 startup. + +A continuously deployed instance of genenetwork2 is available at +[https://cd.genenetwork.org/](https://cd.genenetwork.org/). This +instance is redeployed on every commit provided that the [continuous +integration tests](https://ci.genenetwork.org/jobs/genenetwork2) pass. + +## Testing + +To have tests pass, the redis and mariadb instance should be running, because of +asserts sprinkled in the code base. + +Right now, the only tests running in CI are unittests. Please make +sure the existing unittests are green when submitting a PR. + +From the root directory of the repository, you can run the tests with something +like: + +```sh +env GN_PROFILE=~/opt/gn-latest SERVER_PORT=5300 \ + SQL_URI= \ + ./bin/genenetwork2 ./etc/default_settings.py \ + -c -m pytest -vv +``` + +In the case where you use the default `etc/default_settings.py` configuration file, you can override any setting as demonstrated with the `SQL_URI` setting in the command above. + +In order to avoid having to set up a whole host of settings every time with the `env` command, you could copy the `etc/default_settings.py` file to a new location (outside the repository is best), and pass that to `bin/genenetwork2` instead. + +See +[./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/doc/docker-container.org) +for more details. + +#### Mechanical Rob + +We are building 'Mechanical Rob' automated testing using Python +[requests](https://github.com/genenetwork/genenetwork2/tree/testing/test/requests) +which can be run with: + +```sh +env GN2_PROFILE=~/opt/gn-latest \ + ./bin/genenetwork2 \ + GN_PROXY_URL="http://localhost:8080" \ + GN3_LOCAL_URL="http://localhost:8081 "\ + ./etc/default_settings.py -c \ + ../test/requests/test-website.py -a http://localhost:5003 +``` + +The GN2_PROFILE is the Guix profile that contains all +dependencies. The ./bin/genenetwork2 script sets up the environment +and executes test-website.py in a Python interpreter. The -a switch +says to run all tests and the URL points to the running GN2 http +server. + +#### Unit tests + +To run unittests, first `cd` into the genenetwork2 directory: + +```sh +# You can use the coverage tool to run the tests +# You could omit the -v which makes the output verbose +runcmd coverage run -m unittest discover -v + +# Alternatively, you could run the unittests using: +runpython -m unittest discover -v + +# To generate a report in wqflask/coverage_html_report/: +runcmd coverage html +``` + +The `runcmd` and `runpython` are shell aliases defined in the following way: + +```sh +alias runpython="env GN2_PROFILE=~/opt/gn-latest TMPDIR=/tmp SERVER_PORT=5004 GENENETWORK_FILES=/gnu/data/gn2_data/ GN_PROXY_URL="http://localhost:8080" GN3_LOCAL_URL="http://localhost:8081" ./bin/genenetwork2 + +alias runcmd="time env GN2_PROFILE=~/opt/gn-latest TMPDIR=//tmp SERVER_PORT=5004 GENENETWORK_FILES=/gnu/data/gn2_data/ GN_PROXY_URL="http://localhost:8080" GN3_LOCAL_URL="http://localhost:8081" ./bin/genenetwork2 ./etc/default_settings.py -cli" +``` + +Replace some of the env variables as per your use case. + +### Troubleshooting + +If the menu does not pop up check your `GN2_BASE_URL`. E.g. + +``` +curl http://gn2-pjotr.genenetwork.org/api/v_pre1/gen_dropdown +``` + +check the logs. If there is ERROR 1054 (42S22): Unknown column +'InbredSet.Family' in 'field list' it may be you are trying the small +database. + +### Run Scripts + +As part of the profiling effort, some scripts are added to run specific parts of the system under a profiler without running the entire web-server - as such, to run the script, you could do something like: + +``` +env HOME=/home/frederick \ + GN2_PROFILE=~/opt/gn2-latest \ + GN3_DEV_REPO_PATH=~/genenetwork/genenetwork3 \ + SQL_URI="mysql://username:password@host-ip:host-port/db_webqtl" \ + SPARQL_ENDPOINT=http://localhost:8892/sparql\ + SERVER_PORT=5001 \ + bin/genenetwork2 ../gn2_settings.py \ + -cli python3 -m scripts.profile_corrs \ + ../performance_$(date +"%Y%m%dT%H:%M:%S").profile +``` + +and you can find the performance metrics at the file specified, in this case, a file starting with `performance_` with the date and time of the run, and ending with `.profile`. + +Please replace the environment variables in the sample command above with the appropriate values for your environment. diff --git a/topics/genenetwork/phenotype-naming-convention.gmi b/topics/genenetwork/phenotype-naming-convention.gmi new file mode 100644 index 0000000..dd7583f --- /dev/null +++ b/topics/genenetwork/phenotype-naming-convention.gmi @@ -0,0 +1,33 @@ +# Phenotype Naming Conventions + +In our phenotype data entry in GeneNetwork we have two fields for users to enter abbreviations of their phenotypes - abbreviation before publication and abbreviation after publication. The former must have value but can be cryptic such as EJC_Trait749. But the later abbreviation - which MUST be entered at the same time - is the permanent abbreviation to be used in graphs and figures. + +Many of these abbreviations are getting way way too long to be useful on graphs and plots. The painful reality is that there is almost no rhyme or reason to the format of these abbreviations because we have bad curation: + +* ymaze_SponAlt_12m_NtgBXD_Males +* Barnesiella_genus_HFD_log10_fraction +* OTU_12_CD_log10_fraction +* HW_BW_Male_16_months_and_older +* Log2Fold_vs_CTL_IL6_M_CORT_PFC +* Complex motor Learning +* M_CONSTRICT +* F_LD_TRANSITIONS +* LOC OFLD 20-25 +* Cnt_AdrWts +* Hbidm + +Since we have a second-generation curation tool in progress, it would be great to apply some formal reasoning and formatting conventions to our phenotype descriptions at a higher level. We can build a system that begs or demands that the use follow a particular structure on BUILDING up their abbreviations for their study. For example, we might ask users to use the following conventions for age and sex of cases + +* "_M6-8m" for males 6 to 8 months of age +* "_F>24m" for females older than 24 months, +* "_MF6-8d" for both males and females at 6 to 8 days of age + +1. First we need to impose a limit of 15 characters for true graph-compatible abbreviations. The main purpose of abbreviations is to add labels to graphs and figures. Even 15 characters may be too long, but we can truncate middle characters and just keep the first and last 5 characters if we need to be brutal. We can also allow a "Wordy Abbreviation" or the "Data Owner's Laboratory Style Abbreviation". + +2. Our GN abbreviations must be unique within a particular study but not necessarily across studies. But "across study" is a problem if we have *BW_M_6m* as the body weight of males at 6 months for 6 or more publications. Then we may need to programmatically add further tags such as year of publication (last two digits). + +3. We have to decide on a format that WE IMPOSE. For better or for worse, we are apparently one of the major curators for formats for phenotype abbreviations. Perhaps we need to formalize this with the Phenome Database team. + +Given the above concerns, the real way to think about metadata is descriptive RDF. I.e. separate terms for species, breed, trait, individual. It is fine to come up with identifiers that look descriptive, but they really should not be more than identifiers. Our current practice of parsing identifiers for 'logic' is very fragile and therefore a bad idea. + +There are better ways to do computable semantics; we have some need for “pretty” abbreviations but these are not required to be unique and must be useable on charts so we constrain the length and usually include uid. We are still able to do the curation for mouse traits, so you can access. diff --git a/topics/genenetwork/starting_gn1.gmi b/topics/genenetwork/starting_gn1.gmi new file mode 100644 index 0000000..efbfd0f --- /dev/null +++ b/topics/genenetwork/starting_gn1.gmi @@ -0,0 +1,102 @@ +# Starting GN1 + +The GN1 repos are at + +=> https://github.com/genenetwork/genenetwork1 + +Branches are: + +* master: my main branch - used in pjotr-test +* lily: running but almost discontinued +* production: on tux01 + +Note that there are some hard coded paths/IPs - so simply merging is not a great idea. + +On tux01 GN1 is running inside a Guix container. + +Start a screen and run the guix deploy script. See the README file in + +gn1@tux01:~/production/gnshare/gn + +# Guix + +At this point GN1 is fixed at Feb 2021: + +guix: 1.2.0-12.dffc918 +guix-past: 159be3d7e86e1f22b2b7b1efc938ed63120dc973 +guix-bioinformatics: 697a66bf0e897a101e8e3cefbaf250491039fe93 + +# Building + +On an update of guix the build may fail. Try + +``` +~/opt/guix-gn1/bin/guix build + -L /home/gn1/guix-past/modules/ \ + -L /home/gn1/guix-bioinformatics/ \ + genenetwork1 +``` + +## Updating mariadb connection on lily + +``` + restart apache in lily + [root@lily base]# /etc/init.d/httpd restart + Stopping httpd: [ OK ] + Starting httpd: [ OK ] + [root@lily base]# pwd + /gnshare/gn/web/webqtl/base + [root@lily base]# + /gnshare/gn/web/webqtl/base/webqtlConfigLocal.py + #######################################' + # Environment Variables - private + ######################################### + # sql_host = '[1]tux02.uthsc.edu' + # sql_host = '128.169.4.67' + sql_host = '172.23.18.213' + SERVERNAME = sql_host + MYSQL_SERVER = sql_host + DB_NAME = 'db_webqtl' + DB_USER = 'x' + DB_PASSWD = 'x' + MYSQL_UPDSERVER = sql_host + DB_UPDNAME = 'db_webqtl' + DB_UPDUSER = 'x' + DB_UPDPASSWD = 'x' + GNROOT = '/gnshare/gn/' + PythonPath = '/usr/bin/python' + PIDDLE_FONT_PATH = + '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' +``` + +SQL may also need to be updated here: + +=> /gnshare/gn/web/webqtl/base/webqtlConfigLocal.py +=> /gnshare/gn/web/infoshare/includes/config.html +=> /gnshare/gn/web/infoshare/includes/db.inc + +## Updating from lily + +Git sync + +``` +gn1@tux01:~/production/gnshare/gn-pjotr-test$ +git checkout lily +git pull pjotr@lily.genenetwork.org:/gnshare/gn/ +``` + +Menu sync + +``` +gn1@tux01:~/production/gnshare/gn-pjotr-test$ +scp pjotr@lily.genenetwork.org:/gnshare/gn/web/javascript/*.js web/javascript/ +git status +``` + +## Updating httpd.conf + +To update the httpd.conf you need to edit the system file in guix-bioinformatics. It can be built with + +``` +guix build -L ~/guix-past/modules/ -L ~/guix-bioinformatics/ -e '(@ (gn services gn1-httpd-config) GN1-httpd-config)' +``` diff --git a/topics/genenetwork/starting_gn2_and_gn3.gmi b/topics/genenetwork/starting_gn2_and_gn3.gmi new file mode 100644 index 0000000..1cfed14 --- /dev/null +++ b/topics/genenetwork/starting_gn2_and_gn3.gmi @@ -0,0 +1,52 @@ +# How to Start GN2 and GN3 + +This document describes in a short how we run GN2 and GN3 on the current production setup. + +Note that we should replace this with a system container. + +This details how GN2/GN3 production are currently started. It's probably a good idea to create a shell script for starting GN3 like we have for GN2 at some point, since currently environment variables are set manually. + +See also + +=> systems/gn-services.gmi + +# GN3 + +GN2 depends on GN3 for REST services and libraries. + +## Environment + +Set PATH/PYTHONPATH/GN2_PROFILE environment variables + +Example: + +``` +export GN2_PROFILE=/home/zas1024/opt/gn-latest-20221206 +export PATH=$GN2_PROFILE/bin:$PATH +export PYTHONPATH="$GN2_PROFILE/lib/python3.9/site-packages" +``` + +## Start development on port 8081 + +Start GN3 from the relevant directory + +``` +env FLASK_DEBUG=1 FLASK_APP="main.py" CORS_ORIGINS="http://gn2-zach.genenetwork.org:*,https://gn2-zach.genenetwork.org:*,http://genenetwork.org:*,https://genenetwork.org:*" flask run --port=8081 +``` + +GN3 has a settings.py file now. See the README. + +## Start production on port 8087 + +``` +gn2@tux01: +cd ~/gn3_production/genenetwork3 +gunicorn --bind 0.0.0.0:8087 --workers 8 --keep-alive 6000 --max-requests 10 --max-requests-jitter 5 --timeout 1200 wsgi:app +``` + +Note I had to comment out some oauth stuff on the latest. + + +# GN2 + +1. Just run /home/gn2/production/run_production.sh diff --git a/topics/genenetwork/temp-trait-submission.gmi b/topics/genenetwork/temp-trait-submission.gmi new file mode 100644 index 0000000..7029e2a --- /dev/null +++ b/topics/genenetwork/temp-trait-submission.gmi @@ -0,0 +1,11 @@ +# How to Submit a Temp trait (for testing purposes or otherwise) + +1. Click Submit Trait under the Intro dropdown in the header bar + +2. Copy select the species and group you want to submit for from the dropdowns under Step 1 (I just use Mouse/BXD for testing) + +3. Navigate to the following GN1 link to get a sample list of trait values (so in this case navigate down to BXD) - https://gn1.genenetwork.org/RIsample.html + +4. Copy and paste those values into the Step 2 text area + +5. Click Submit Trait (which should then take you to a trait page with the submitted sample values) diff --git a/topics/guix/building-a-guix-pack-for-sharing.gmi b/topics/guix/building-a-guix-pack-for-sharing.gmi new file mode 100644 index 0000000..b82e81b --- /dev/null +++ b/topics/guix/building-a-guix-pack-for-sharing.gmi @@ -0,0 +1,30 @@ +# Building a ~guix pack~ for Environment Sharing + +## Motivation + +Sometimes, you might +* not have access to guix where you want to deploy +* not be able to run a guix shell (due to space constraints on the /gnu/store partition, etc) +* not be able to create a guix profile (due to space constraints on the /gnu/store partition, etc) +but you still need to have a similar environment for running your application. + +In such cases, ~guix pack~ can come in handy. + +I will use the process I used for +=> gnqc_py https://gitlab.com/fredmanglis/gnqc_py +for demonstration + +> guix pack -RR -S /mybin=bin -S /mylib=lib \ +> -L /home/frederick/genenetwork/guix-bioinformatics \ +> python redis mariadb genenetwork-qc + +* The `-RR` option ensures `guix pack` produces relocatable[1] binaries +* The `-S` adds the symbolic links to the pack[1] +* The `-L`[2] option adds the ~guix-bioinformatics~ directory to the front of the module load path + +We then add python, redis, mariadb, and genenework-qc packages (and their dependencies) to the pack. + +## Footnotes + +=> [1] https://guix.gnu.org/manual/en/html_node/Invoking-guix-pack.html +=> [2] https://www.gnu.org/software/guile/manual/guile.html#Command_002dline-Options diff --git a/topics/installation.gmi b/topics/installation.gmi deleted file mode 100644 index ef25079..0000000 --- a/topics/installation.gmi +++ /dev/null @@ -1,354 +0,0 @@ -# Installation - -This document is WIP and still a mixture of old and new docs. - -Large system deployments can get very complex. In this document we explain the GeneNetwork reproducible deployment system which is based on GNU Guix The Guix system can be used to install GN with all its files and dependencies. - -Note that the official deployment works through a Guix VM. This is described in - -=> ./deployment - -# Check list - -To run GeneNetwork the following services need to function: - -* [ ] GNU Guix with a guix profile for genenetwork2 -* [ ] A path to the (static) genotype files -* [?] Gn-proxy for authentication -* [ ] The genenetwork3 service -* [ ] Redis -* [ ] Mariadb - -# Installing Guix packages - -Make sure to install GNU Guix using the binary download instructions on the main website. Follow the instructions on Note the download amounts to several GBs of data. Debian-derived distros may support - -``` -apt-get install guix -``` - -# Creating a GNU Guix profile - -We run a GNU Guix channel with packages at - -=> https://gitlab.com/genenetwork/guix-bioinformatics - -The README has instructions hosting a channel (recommended!), but sometimes we use the GUIX_PACKAGE_PATH instead. First upgrade to a recent guix with - -``` -mkdir ~/opt -guix pull -p ~/opt/guix-pull -``` - -It should upgrade (ignore the locales warnings). You can optionally specify the specific git checkout of guix with - -``` -guix pull -p ~/opt/guix-pull --commit=f04883d -``` - -which is useful when you need to roll back to an earlier version (sometimes our channel goes out of sync). Next, we install GeneNetwork2 with - -``` -source ~/opt/guix-pull/etc/profile -git clone https://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics.git ~/guix-bioinformatics -``` - -you probably also need guix-past (the upstream channel for older packages): - -``` -git clone https://gitlab.inria.fr/guix-hpc/guix-past.git ~/guix-past -cd ~/guix-past -env GUIX_PACKAGE_PATH=$HOME/guix-bioinformatics:$HOME/guix-past/modules ~/opt/guix-pull/bin/guix package -i genenetwork2 -p ~/opt/genenetwork2 -``` - -Ignore the warnings. Guix should install the software without trying to build everything. If you system insists on building all packages, try the `--dry-run` switch and fix the [[https://guix.gnu.org/manual/en/html_node/Substitute-Server-Authorization.html][substitutes]]. You may add the `--substitute-urls="http://guix.genenetwork.org https://ci.guix.gnu.org https://mirror.hydra.gnu.org"` switch. - -The guix.genenetwork.org has most of our packages pre-built(!). To use it on your own machine the public key is - -``` -(public-key - (ecc - (curve Ed25519) - (q #9F56EAB5CE37AA15693C31F451140588240F259676C137E31C0CA70EC4D1B534#) - ) - ) -``` - -Once we have a GNU Guix profile, a running database (see below) and the file storage, -we should be ready to fire up GeneNetwork: - -# Running GN2 - -Check out the source with git: - -``` -git clone git@github.com:genenetwork/genenetwork2.git -cd genenetwork2 -``` - -You may want to use the testing branch. - -Run GN2 with earlier created Guix profile - -``` -export GN2_PROFILE=$HOME/opt/genenetwork2 -env TMPDIR=$HOME/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5012 GENENETWORK_FILES=/export/data/genenetwork/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 etc/default_settings.py -gunicorn-dev -``` - -The script comes with debug and logging switches can be particularly useful when -developing GN2. Location and files are examples. - -It may be useful to tunnel the web server to your local browser with an ssh tunnel: - -## Testing on an ssh tunnnel - -If you want to test a service running on the server on a certain port (say 8202) use - - ssh -L 8202:127.0.0.1:8202 -f -N myname@penguin2.genenetwork.org - -And browse on your local machine to http://localhost:8202/ - -# BELOW INFORMATION NEEDS TO BE UPDATED - -* Run gn-proxy - -GeneNetwork requires a separate gn-proxy server which handles -authorisation and access control. For instructions see the -[[https://github.com/genenetwork/gn-proxy][README]]. Note it may already be running on our servers! - -* Run Redis - -Redis part of GN2 deployment and will be started by the ./bin/genenetwork2 -startup script. - -* Run MariaDB server -** Install MariaDB with GNU GUIx - -These are the steps you can take to install a fresh installation of -mariadb (which comes as part of the GNU Guix genenetwork2 install). - -As root configure the Guix profile - -: . ~/opt/genenetwork2/etc/profile - -and run for example - -#+BEGIN_SRC bash -adduser mariadb && addgroup mariadb -mkdir -p /export2/mariadb/database -chown mariadb.mariadb -R /export2/mariadb/ -mkdir -p /var/run/mysqld -chown mariadb.mariadb /var/run/mysqld -su mariadb -mysql --version - mysql Ver 15.1 Distrib 10.1.45-MariaDB, for Linux (x86_64) using readline 5.1 -mysql_install_db --user=mariadb --datadir=/export2/mariadb/database -mysqld -u mariadb --datadir=/exportdb/mariadb/database/mariadb --explicit_defaults_for_timestamp -P 12048" -#+END_SRC - -If you want to run as root you may have to set - -: /etc/my.cnf -: [mariadbd] -: user=root - -You also need to set - -: ft_min_word_len = 3 - -To make sure word text searches (shh) work and rebuild the tables if -required. - -To check error output in a file on start-up run with something like - -: mariadbd -u mariadb --console --explicit_defaults_for_timestamp --datadir=/gnu/mariadb --log-error=~/test.log - -Other tips are that Guix installs mariadbd in your profile, so this may work - -: /home/user/.guix-profile/bin/mariadbd -u mariadb --explicit_defaults_for_timestamp --datadir=/gnu/mariadb - -When you get errors like: - -: qlalchemy.exc.IntegrityError: (_mariadb_exceptions.IntegrityError) (1215, 'Cannot add foreign key constraint') - -you may need to set - -: set foreign_key_checks=0 - -** Load the small database in MySQL - -At this point we require the underlying distribution to install and -run mysqld (see next section for GNU Guix). Currently we have two databases for deployment, -'db_webqtl_s' is the small testing database containing experiments -from BXD mice and 'db_webqtl_plant' which contains all plant related -material. - -Download one database from - -http://ipfs.genenetwork.org/ipfs/QmRUmYu6ogxEdzZeE8PuXMGCDa8M3y2uFcfo4zqQRbpxtk - -After installation unzip the database binary in the MySQL directory - -#+BEGIN_SRC sh -cd ~/mysql -p7zip -d db_webqtl_s.7z -chown -R mysql:mysql db_webqtl_s/ -chmod 700 db_webqtl_s/ -chmod 660 db_webqtl_s/* -#+END_SRC - -restart MySQL service (mysqld). Login as root - -: mysql_upgrade -u root --force - -: myslq -u root - -and - -: mysql> show databases; -: +--------------------+ -: | Database | -: +--------------------+ -: | information_schema | -: | db_webqtl_s | -: | mysql | -: | performance_schema | -: +--------------------+ - -Set permissions and match password in your settings file below: - -: mysql> grant all privileges on db_webqtl_s.* to gn2@"localhost" identified by 'webqtl'; - -You may need to change "localhost" to whatever domain you are -connecting from (mysql will give an error). - -Note that if the mysql connection is not working, try connecting to -the IP address and check server firewall, hosts.allow and mysql IP -configuration (see below). - -Note for the plant database you can rename it to db_webqtl_s, or -change the settings in etc/default_settings.py to match your path. - -* Get genotype files - -The script looks for genotype files. You can find them in -http://ipfs.genenetwork.org/ipfs/QmXQy3DAUWJuYxubLHLkPMNCEVq1oV7844xWG2d1GSPFPL - -#+BEGIN_SRC sh -mkdir -p $HOME/genotype_files -cd $HOME/genotype_files - -#+END_SRC - -* GN2 Dependency Graph - -List of all runtime dependencies for GN2 as installed by GNU Guix. - -https://genenetwork.org/environments/ - -* Working with the GN2 source code - -See [[development.org]]. - -* Read more - -If you want to understand the architecture of GN2 read -[[Architecture.org]]. The rest of this document is mostly on deployment -of GN2. - -* Trouble shooting - -** ImportError: No module named jinja2 - -If you have all the Guix packages installed this error points out that -the environment variables are not set. Copy-paste the paths into your -terminal (mainly so PYTHON_PATH and R_LIBS_SITE are set) from the -information given by guix: - -: guix package --search-paths - -On one system: - -: export PYTHONPATH="$HOME/.guix-profile/lib/python3.8/site-packages" -: export R_LIBS_SITE="$HOME/.guix-profile/site-library/" -: export GEM_PATH="$HOME/.guix-profile/lib/ruby/gems/2.2.0" - -and perhaps a few more. -** ERROR: 'can not find directory $HOME/gn2_data' or 'can not find directory $HOME/genotype_files/genotype' - -The default settings file looks in your $HOME/gn2_data. Since these -files come with a Guix installation you should take a hint from the -values in the installed version of default_settings.py (see above in -this document). - -You can use the GENENETWORK_FILES switch to set the datadir, for example - -: env GN2_PROFILE=~/opt/gn-latest GENENETWORK_FILES=/gnu/data/gn2_data ./bin/genenetwork2 - -** Can't run a module - -In rare cases, development modules are not brought in with Guix -because no source code is available. This can lead to missing modules -on a running server. Please check with the authors when a module -is missing. -** Rpy2 error 'show' now found - -This error - -: __show = rpy2.rinterface.baseenv.get("show") -: LookupError: 'show' not found - -means that R was updated in your path, and that Rpy2 needs to be -recompiled against this R - don't you love informative messages? - -In our case it means that GN's PYTHONPATH is not in sync with -R_LIBS_SITE. Please check your GNU Guix GN2 installation paths, -you man need to reinstall. Note that this may be the point you -may want to start using profiles (see profile section). - -** Mysql can't connect server through socket ERROR - -The following error - -: sqlalchemy.exc.OperationalError: (_mysql_exceptions.OperationalError) (2002, 'Can\'t connect to local MySQL server through socket \'/run/mysqld/mysqld.sock\' (2 "No such file or directory")') - -means that MySQL is trying to connect locally to a non-existent MySQL -server, something you may see in a container. Typically replicated with something like - -: mysql -h localhost - -try to connect over the network interface instead, e.g. - -: mysql -h 127.0.0.1 - -if that works run genenetwork after setting SQL_URI to something like - -: export SQL_URI=mysql://gn2:mysql_password@127.0.0.1/db_webqtl_s - -* NOTES - -** Deploying GN2 official - -Let's see how fast we can deploy a second copy of GN2. - -- [ ] Base install - + [ ] First install a Debian server with GNU Guix on board - + [ ] Get Guix build going - - [ ] Build the correct version of Guix - - [ ] Check out the correct gn-stable version of guix-bioinformatics http://git.genenetwork.org/pjotrp/guix-bioinformatics - - [ ] guix package -i genenetwork2 -p /usr/local/guix-profiles/gn2-stable - + [ ] Create a gn2 user and home with space - + [ ] Install redis - - [ ] add to systemd - - [ ] update redis.cnf - - [ ] update database - + [ ] Install mariadb (currently debian mariadb-server) - - [ ] add to systemd - - [ ] system stop mysql - - [ ] update mysql.cnf - - [ ] update database (see gn-services/services/mariadb.md) - - [ ] check tables - + [ ] run gn2 - + [ ] update nginx - + [ ] install genenetwork3 - - [ ] add to systemd diff --git a/topics/machines.gmi b/topics/machines.gmi deleted file mode 100644 index 49c3ee6..0000000 --- a/topics/machines.gmi +++ /dev/null @@ -1,46 +0,0 @@ -# Machines - -``` -- [X] lambda01 172.23.18.212 (7c:c2:55:11:9c:ac) -- [ ] tux03i 172.23.17.181 (00:0a:f7:c1:00:8d) - 10 Gbs - [X] tux03 128.169.5.101 (00:0a:f7:c1:00:8b) - 1 Gbs -- [X] tux06 172.23.17.188 (14:23:f2:4e:29:10) -- [X] tux07 172.23.17.191 (14:23:f2:4e:7d:60) -- [X] tux08 172.23.17.186 (14:23:f2:4f:4e:b0) -- [X] tux09 172.23.17.182 (14:23:f2:4e:49:10) -- [X] space 128.169.5.175 (e4:3d:1a:80:6c:40) -- [ ] octopus01f 172.23.18.221 (2c:ea:7f:60:bf:61) -- [ ] octopus02f 172.23.22.159 (2c:ea:7f:60:bd:61) -- [ ] octopus03f 172.23.19.187 (2c:ea:7f:60:ac:2b) -- [ ] octopus04f 172.23.17.254 (2c:ea:7f:60:af:14) -``` - -## Out-of-band access - -c for console or control - -``` -- [ ] DNS entries no longer visible -- [X] lambda01-c 172.23.17.173 (3c:ec:ef:aa:e5:50) -- [X] tux01-c 172.23.31.85 (58:8A:5A:F9:3A:22) -- [X] tux02-c 172.23.30.40 (58:8A:5A:F0:E6:E4) -- [X] tux03-c 172.23.31.87 (D0:94:66:37:39:D5) -- [X] tux04-c 172.23.18.222 (C8:4B:D6:97:03:7D) -- [X] tux05-c 172.23.22.218 (C8:4B:D6:97:05:C3) -- [X] tux06-c 172.23.18.219 (C8:4B:D6:9B:8B:3A) -- [X] tux07-c 172.23.21.85 (C8:4B:D6:9B:8B:8E) -- [X] tux08-c 172.23.17.161 (C8:4B:D6:9B:8B:76) -- [X] tux09-c 172.23.17.187 (C8:4B:D6:9B:8D:14) -- [X] space-c 172.23.31.74 (CC:48:3A:13:DB:45) -- [X] octopus01-c 172.23.31.39 (2C:EA:7F:F2:9E:10) -- [X] octopus02-c 172.23.31.37 (2C:EA:7F:F2:D8:74) -- [X] octopus03-c 172.23.31.94 (2C:EA:7F:F2:CC:34) -- [X] octopus04-c 172.23.31.38 (2C:EA:7F:F2:CF:F4) -- [X] octopus05-c 172.23.31.95 (2C:EA:7F:F2:D2:7C) -- [X] octopus06-c 172.23.31.36 (2C:EA:7F:F2:CF:A4) -- [X] octopus07-c 172.23.31.65 (2C:EA:7F:F2:D8:14) -- [X] octopus08-c 172.23.31.66 (2C:EA:7F:F2:CA:D4) -- [X] octopus09-c 172.23.31.71 (2C:EA:7F:F2:D3:74) -- [X] octopus10-c 172.23.31.33 (2C:EA:7F:F2:D2:6C) -- [X] octopus11-c 172.23.31.64 (2C:EA:7F:F2:CE:84) -``` diff --git a/topics/maybe-monad.gmi b/topics/maybe-monad.gmi deleted file mode 100644 index d6f87f2..0000000 --- a/topics/maybe-monad.gmi +++ /dev/null @@ -1,61 +0,0 @@ -# Maybe monad - -None values are values that represent the absence of a value. This leads to a proliferation of conditionals and special cases in the code, and is a terrible way to represent the absence of a value. We need something better. Enter the maybe monad. - -For a detailed case against None values, read -=> https://www.lucidchart.com/techblog/2015/08/31/the-worst-mistake-of-computer-science/ - -Consider the following code snippet where we print a value if it is not None. -``` -def print_unless_none(x): - if x is not None: - print(x) - -foo = 1 -bar = None -print_unless_none(foo) -print_unless_none(bar) -``` - -Rewriting the same code using the maybe monad, we can avoid the conditional check making the code more concise and more robust against bugs. -``` -from pymonad.maybe import Just, Nothing - -foo = Just(1) -bar = Nothing -foo.bind(print) -bar.bind(print) -``` - -Monads may also be passed through a chain of function calls without any condition checking in between. If foo were Nothing, the entire sequence of operations would be skipped with no error raised. Notice how this is much cleaner than interleaving the code with if conditions checking for None intermediate values. -``` -foo = Just(1) -foo.map(lambda x: 1 + x) \ - .map(lambda x: x**2) \ - .bind(print) -``` - -Finally, let's put all this together in a practical example using sql_query_mdict from genenetwork. Consider the following code using the DictCursor. The column foo may contain NULL values, and we need to check for them. -``` -with database_connection() as conn: - with conn.cursor(MySQLdb.cursors.DictCursor) as cursor: - cursor.execute("SELECT foo FROM bar") - for row in cursor.fetchall(): - if row["foo"] is not None: - print(row["foo"]) -``` -But, with sql_query_mdict, the row object is a MonadictDict where all values are monadic. We therefore do not need any special conditional checks. -``` -with database_connection() as conn: - for row in sql_query_mdict(conn, "SELECT foo FROM bar"): - row["foo"].bind(print) -``` -As a bonus, sql_query_mdict also gets rid of cursors by returning a generator and letting us iterate over it pythonically. - -## Useful Resources - -=> https://www.miguelfarrajota.com/2021/06/monads-in-python-with-pymonad/ - -=> https://jasondelaat.github.io/pymonad_docs/explanations/whats-a-monad.html - -=> https://simon.tournier.info/posts/2021-02-03-monad.html diff --git a/topics/our-virtuoso-instances.gmi b/topics/our-virtuoso-instances.gmi deleted file mode 100644 index 0336018..0000000 --- a/topics/our-virtuoso-instances.gmi +++ /dev/null @@ -1,41 +0,0 @@ -# Our virtuoso instances - -We run three instances of virtuoso. - -* virtuoso on tux01 for internal use by the production server -* virtuoso on tux02 in the development (aka CI/CD) container for internal use by the CD -* virtuoso on tux02 as a public SPARQL endpoint - -The public SPARQL endpoint is accessible at -=> https://sparql.genenetwork.org/sparql - -## Configuration - -All our virtuoso instances are deployed in Guix system containers. The configuration for these containers is at - -=> https://github.com/genenetwork/genenetwork-machines/blob/main/virtuoso.scm Configuration for internal production virtuoso on tux01 -=> https://github.com/genenetwork/genenetwork-machines/blob/main/genenetwork-development.scm Configuration for internal CD virtuoso on tux02 -=> https://github.com/genenetwork/genenetwork-machines/blob/main/public-sparql.scm Configuration for public SPARQL endpoint virtuoso on tux02 - -## Ports - -The ports these virtuoso instances are deployed on is subject to change, and you should really look up the Guix configuration file to find the correct port. But, here is a summary for quick lookup. Please fix this if it is inaccurate. - -### Internal production virtuoso on tux01 - -* Server port: 8891 -* SPARQL endpoint port: 8892 - -### Internal CD virtuoso on tux02 - -* Server port: 9081 -* SPARQL endpoint port: 9082 - -### Public SPARQL endpoint virtuoso on tux02 - -* Server port: 8981 -* SPARQL endpoint port: 8982 - -## Passwords - -Password based authentication is required to mutate the data in these virtuoso instances. These passwords are available with Pjotr and Arun. Please contact them if you need access. diff --git a/topics/phenotype-naming-convention.gmi b/topics/phenotype-naming-convention.gmi deleted file mode 100644 index dd7583f..0000000 --- a/topics/phenotype-naming-convention.gmi +++ /dev/null @@ -1,33 +0,0 @@ -# Phenotype Naming Conventions - -In our phenotype data entry in GeneNetwork we have two fields for users to enter abbreviations of their phenotypes - abbreviation before publication and abbreviation after publication. The former must have value but can be cryptic such as EJC_Trait749. But the later abbreviation - which MUST be entered at the same time - is the permanent abbreviation to be used in graphs and figures. - -Many of these abbreviations are getting way way too long to be useful on graphs and plots. The painful reality is that there is almost no rhyme or reason to the format of these abbreviations because we have bad curation: - -* ymaze_SponAlt_12m_NtgBXD_Males -* Barnesiella_genus_HFD_log10_fraction -* OTU_12_CD_log10_fraction -* HW_BW_Male_16_months_and_older -* Log2Fold_vs_CTL_IL6_M_CORT_PFC -* Complex motor Learning -* M_CONSTRICT -* F_LD_TRANSITIONS -* LOC OFLD 20-25 -* Cnt_AdrWts -* Hbidm - -Since we have a second-generation curation tool in progress, it would be great to apply some formal reasoning and formatting conventions to our phenotype descriptions at a higher level. We can build a system that begs or demands that the use follow a particular structure on BUILDING up their abbreviations for their study. For example, we might ask users to use the following conventions for age and sex of cases - -* "_M6-8m" for males 6 to 8 months of age -* "_F>24m" for females older than 24 months, -* "_MF6-8d" for both males and females at 6 to 8 days of age - -1. First we need to impose a limit of 15 characters for true graph-compatible abbreviations. The main purpose of abbreviations is to add labels to graphs and figures. Even 15 characters may be too long, but we can truncate middle characters and just keep the first and last 5 characters if we need to be brutal. We can also allow a "Wordy Abbreviation" or the "Data Owner's Laboratory Style Abbreviation". - -2. Our GN abbreviations must be unique within a particular study but not necessarily across studies. But "across study" is a problem if we have *BW_M_6m* as the body weight of males at 6 months for 6 or more publications. Then we may need to programmatically add further tags such as year of publication (last two digits). - -3. We have to decide on a format that WE IMPOSE. For better or for worse, we are apparently one of the major curators for formats for phenotype abbreviations. Perhaps we need to formalize this with the Phenome Database team. - -Given the above concerns, the real way to think about metadata is descriptive RDF. I.e. separate terms for species, breed, trait, individual. It is fine to come up with identifiers that look descriptive, but they really should not be more than identifiers. Our current practice of parsing identifiers for 'logic' is very fragile and therefore a bad idea. - -There are better ways to do computable semantics; we have some need for “pretty” abbreviations but these are not required to be unique and must be useable on charts so we constrain the length and usually include uid. We are still able to do the curation for mouse traits, so you can access. diff --git a/topics/programming/better-logging.gmi b/topics/programming/better-logging.gmi new file mode 100644 index 0000000..dca8c0d --- /dev/null +++ b/topics/programming/better-logging.gmi @@ -0,0 +1,29 @@ +# Improving Logging in GN2 + +## What Are We Trying To Solve? + +We prioritise maintaining user functionality over speed in GN [with time this speed will be improved]. As such we should be pay more attention at not breaking any currently working GN2 functionality. And when/if we do, trouble-shooting should be easy. On this front, one way is to stream-line logging in both GN2/GN3 and make it more script friendly - only report when something fails, not to instrument variables - and in so doing make the process of monitoring easier. + +## Goals + +* Have script-friendly error/info logs. +* Remove noise from GN2. +* Separate logging into different files: error logs, info logs. Add this somewhere with Flask itself instead of re-directing STDOUT to a file. + +### Non-goals + +* Logging in GN3. +* Parsing logs to extract goals. +* Getting rid of "gn.db" global object and in so doing removing "MySqlAlchemy" [that we really shouldn't be using]. +* Adding log messages to existing functions. + +## Actual Design + +* Get rid of "utility.logger" module and replace it with Flask's or Python's in-built logging. +* Configure the logging system to automatically add the module name, line number, time-stamps etc. +* Use a better format for displaying the logs. +* Delete "with Bench..." calls. + +## Resources + +=> https://realpython.com/python-logging/ Logging in Python diff --git a/topics/programming/code-antipatterns.gmi b/topics/programming/code-antipatterns.gmi new file mode 100644 index 0000000..2544451 --- /dev/null +++ b/topics/programming/code-antipatterns.gmi @@ -0,0 +1,93 @@ +# Coding Anti-Patterns + +This document contains some anti-patterns that have either been discussed during code-reviews or noticed by some one when refactoring any of GN's code-base. Use these "ideas" as a reference, and apply reasonable judgement depending on what you are working on. + + +## [Python] Don't pass mutable types as arguments + +Consider: + +``` +from typing import List + + +_l = ['a', 'b', 'c'] + + +def add_pvalue(l: List, val: str) -> List: + l.append(val) + return l + +print(f"{_l=}\n") +print(f"{add_pvalue(_l, 'd')=}\n") +print(f"{_l=}") + +``` + +which outputs (note that _l in the global scope has changed): + +``` +_l=['a', 'b', 'c'] + +add_pvalue(_l, 'd')=['a', 'b', 'c', 'd'] + +_l=['a', 'b', 'c', 'd'] +``` + +A better fix would be: + + +``` +from typing import List + + +_l = ['a', 'b', 'c'] + + +def add_pvalue(l: List, val: str) -> List: + l = l.copy() + l.append(val) + return l + +print(f"{_l=}\n") +print(f"{add_pvalue(_l, 'd')=}\n") +print(f"{_l=}") +``` + +which now does the right thing: + +``` +_l=['a', 'b', 'c'] + +add_pvalue(_l, 'd')=['a', 'b', 'c', 'd'] + +_l=['a', 'b', 'c'] + +``` + +Best, use immutable types: + +``` +from typing import Tuple + + +_l = ['a', 'b', 'c'] + + +def add_pvalue(l: Tuple, val: str) -> Tuple: + return l + (val,) + +print(f"{_l=}\n") +print(f"{add_pvalue(tuple(_l), 'p')=}\n") +print(f"{_l=}") +``` + +which outputs: + +``` +_l=['a', 'b', 'c'] + +add_pvalue(tuple(_l), 'p')=('a', 'b', 'c', 'p') + +_l=['a', 'b', 'c'] +``` diff --git a/topics/programming/coding-guidelines.gmi b/topics/programming/coding-guidelines.gmi new file mode 100644 index 0000000..8f83ba5 --- /dev/null +++ b/topics/programming/coding-guidelines.gmi @@ -0,0 +1,16 @@ +# Coding guidelines + +We aim to adhere to the following coding guidelines. + +=> /topics/use-exceptions-to-indicate-errors Exceptions, not None return values +=> /topics/maybe-monad Maybe monad, not None values +=> /topics/better-logging Log messages +=> /topics/code-antipatterns Coding Anti-Patterns +=> /topics/lisp/debugging Debugging lisp +=> /topics/lisp/common-lisp-sly Common LISP: sly +=> /topics/lisp/define-condition Common Lisp Condition Handling +=> /topics/lisp/lisp4schemers Lisp For Schemers +=> /topics/lisp/tips-and-tricks Defining a scheme sexp comment with a reader macro +=> /topics/profiling_python_code Profiling Python code + +This document is an index of other documents describing coding guidelines. Add more here as you write/discover them. diff --git a/topics/programming/maybe-monad.gmi b/topics/programming/maybe-monad.gmi new file mode 100644 index 0000000..d6f87f2 --- /dev/null +++ b/topics/programming/maybe-monad.gmi @@ -0,0 +1,61 @@ +# Maybe monad + +None values are values that represent the absence of a value. This leads to a proliferation of conditionals and special cases in the code, and is a terrible way to represent the absence of a value. We need something better. Enter the maybe monad. + +For a detailed case against None values, read +=> https://www.lucidchart.com/techblog/2015/08/31/the-worst-mistake-of-computer-science/ + +Consider the following code snippet where we print a value if it is not None. +``` +def print_unless_none(x): + if x is not None: + print(x) + +foo = 1 +bar = None +print_unless_none(foo) +print_unless_none(bar) +``` + +Rewriting the same code using the maybe monad, we can avoid the conditional check making the code more concise and more robust against bugs. +``` +from pymonad.maybe import Just, Nothing + +foo = Just(1) +bar = Nothing +foo.bind(print) +bar.bind(print) +``` + +Monads may also be passed through a chain of function calls without any condition checking in between. If foo were Nothing, the entire sequence of operations would be skipped with no error raised. Notice how this is much cleaner than interleaving the code with if conditions checking for None intermediate values. +``` +foo = Just(1) +foo.map(lambda x: 1 + x) \ + .map(lambda x: x**2) \ + .bind(print) +``` + +Finally, let's put all this together in a practical example using sql_query_mdict from genenetwork. Consider the following code using the DictCursor. The column foo may contain NULL values, and we need to check for them. +``` +with database_connection() as conn: + with conn.cursor(MySQLdb.cursors.DictCursor) as cursor: + cursor.execute("SELECT foo FROM bar") + for row in cursor.fetchall(): + if row["foo"] is not None: + print(row["foo"]) +``` +But, with sql_query_mdict, the row object is a MonadictDict where all values are monadic. We therefore do not need any special conditional checks. +``` +with database_connection() as conn: + for row in sql_query_mdict(conn, "SELECT foo FROM bar"): + row["foo"].bind(print) +``` +As a bonus, sql_query_mdict also gets rid of cursors by returning a generator and letting us iterate over it pythonically. + +## Useful Resources + +=> https://www.miguelfarrajota.com/2021/06/monads-in-python-with-pymonad/ + +=> https://jasondelaat.github.io/pymonad_docs/explanations/whats-a-monad.html + +=> https://simon.tournier.info/posts/2021-02-03-monad.html diff --git a/topics/programming/use-exceptions-to-indicate-errors.gmi b/topics/programming/use-exceptions-to-indicate-errors.gmi new file mode 100644 index 0000000..e302dd3 --- /dev/null +++ b/topics/programming/use-exceptions-to-indicate-errors.gmi @@ -0,0 +1,16 @@ +# Use exceptions to indicate errors + +Often, we indicate that a function has encountered an error by returning a None value. Here's why this is a bad idea and why you should use exceptions instead. + +When we return None values to indicate errors, we have to take care to check the return value of every function call and propagate errors higher and higher up the function call stack until we reach a point where the error is handled. This clutters up the code, and is one reason why writing correct code in languages like C that don't have exceptions is a pain. + +With exceptions, we only have to create an exception handler (try/except block in Python) at the highest level. Any exception raised by functions below that level are automatically passed on to the except block with no additional programmer effort. + +Here's an example where we run mapping, and if there's an error, we return an error page. Else, we return the results page. Notice that we do not check the return value template_vars. +``` +try: + template_vars = run_mapping.RunMapping(start_vars, temp_uuid) + return render_template("mapping_results.html", **template_vars) +except: + return render_template("mapping_error.html") +``` diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi deleted file mode 100644 index 3b743b9..0000000 --- a/topics/setting-up-local-development-database.gmi +++ /dev/null @@ -1,321 +0,0 @@ -# Setting up Local Development Database - -You need to set up a quick local database for development without polluting your environment. - -## Method 0 (tunnel) - -You can use ssh tunneling to access mysql from your machine. Try something like: - -``` -ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org -mysql -uwebqtlout -pwebqtlout -h 127.0.0.1 db_webqtl -A -e "show tables;" -``` - -If you already have that port in use on your machine try - -``` -ssh -L 3307:127.0.0.1:3306 -f -N tux02.genenetwork.org -mysql -uwebqtlout -pwebqtlout -h 127.0.0.1 -P 3307 db_webqtl -A -e "show tables;" -``` - -To keep the connection alive add something like this to your `~/.ssh/config` - -``` -ServerAliveInterval 60 -ServerAliveCountMax 10 -``` - -For specific hosts you can set it up as - -``` -Host tux02 - HostName tux02.genenetwork.org - TCPKeepAlive yes - ServerAliveInterval 60 - user myname -``` - -## Method 1 (Using Guix system containers) - -Setting up mariadb in a Guix container is the preferred and easier method. But, you need root access to run the container. The genenetwork2 repo comes with a guix system container definition to run MariaDB and Redis. From the genenetwork2 repo, you can build and run the container using: -``` -$ sudo $(./containers/db-container.sh) -``` -You should now be able to connect to the database using -``` -$ mysql --protocol tcp -u root -``` -Create a database db_webqtl_s -``` -MariaDB [mysql]> CREATE DATABASE db_webqtl_s; -``` -Load the small database dump into the database. You may find the small database either on space or tux02 at /home/aruni/gn2.sql.lz -``` -$ lzip -cd gn2.sql.lz | mysql --protocol tcp -u root db_webqtl_s -``` -Since this is a develpoment server accessible only from localhost, it is ok to use the root user with no password. Configure your development instance of genenetwork2 with the following SQL_URI. -``` -SQL_URI="mysql://root@127.0.0.1:3306/db_webqtl_s" -``` - -## Method 2 (running a local container) - -A local container runs as a normal user. It requires some juggling of dirs to load an existing database. Make sure to not run on the same dirs as another mariadb instance(!) -Here we run a new database, but you can use an existing DB as was done in - -=> precompute-mapping-input-data.gmi - -``` -cd ~/tmp/mariadb -mkdir var -~/tmp/mariadb$ ~/opt/guix-pull/bin/guix shell -C -N coreutils sed mariadb --share=var=/var --share=/export2/tmp=/tmp -``` - -inside the container - -``` -mkdir -p /var/lib/data -mkdir -p /var/lib/mysql -mkdir /var/run -``` - -Initialize with - -``` -mysql_install_db -``` - -and run, for example - -``` -export TMPDIR=/tmp -mysqld_safe --datadir='/var/lib/mysql/' --port=3307 --user=$USER --group=users --nowatch --socket=/var/run/mysqld/mysqld.sock -``` - -Now from the container you should be able to connect with the socket - -``` -/export/mysql$ ~/opt/guix-pull/bin/guix shell mysql -- mysql --socket=var/run/mysqld/mysqld.sock -uwebqtlout -pwebqtlout db_webqtl -``` - -and - -``` -Welcome to the MariaDB monitor. Commands end with ; or \g. -Your MariaDB connection id is 3 -Server version: 10.5.12-MariaDB MariaDB Server - -Copyright (c) 2000, 2018, Oracle, MariaDB Corporation Ab and others. - -Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. - -MariaDB [(none)]> show databases; -+--------------------+ -| Database | -+--------------------+ -| information_schema | -| mysql | -| performance_schema | -| test | -+--------------------+ -4 rows in set (0.001 sec) -``` - -To run/bind on a network interface we can open up with: - -``` -export TMPDIR=/tmp -mysqld_safe --datadir='/var/lib/mysql/' --port=3307 --user=$USER --group=users --nowatch --bind-address 127.0.0.1 --socket=/var/run/mysqld/mysqld.sock -``` - -Test with - - -``` - ~/opt/guix-pull/bin/guix shell mysql -- mysql -uwebqtlout -pwebqtlout db_webqtl -h 127.0.0.1 --port=3307 -``` - -Please don't use world writeable bind-address 0.0.0.0. If you run it for your own purposes use ssh tunneling instead (see above). - -If you need to tweak the server configuration you can load the my.cnf file with the `--defaults-file=var/my.cnf` inside and outside the container. - -## Method 3 (Manual method without Guix) - -/This is not recommended/ - -* An assumption is made that the GeneNetwork2 profile is in ~/opt/gn_profiles/gn2_latest for the purposes of this documentation. Please replace as appropriate. -* We install the database files under ~/genenetwork/mariadb. Change as appropriate. - -Set up directories - -``` -mkdir -pv ~/genenetwork/mariadb/var/run -mkdir -pv ~/genenetwork/mariadb/var/lib/data -mkdir -pv ~/genenetwork/mariadb/var/lib/mysql -``` - -Set up default my.cnf - -``` -cat < ~/genenetwork/mariadb/my.cnf -[client-server] -socket=~/genenetwork/mariadb/var/run/mysqld/mysqld.sock -port=3307 - -[server] -user=$(whoami) -socket=~/genenetwork/mariadb/var/run/mysqld/mysqld.sock -basedir=~/opt/gn_profiles/gn2_latest -datadir=~/genenetwork/mariadb/var/lib/data -ft_min_word_len=3 -EOF -``` - -Install the database - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql_install_db \ - --defaults-file=~/genenetwork/mariadb/my.cnf -``` - -Running the daemon: - -``` -~/opt/gn_profiles/gn2_latest/bin/mysqld_safe \ - --defaults-file=~/genenetwork/mariadb/my.cnf -``` - -Connect to daemon - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf -``` - -Set up password for user - -``` -MariaDB [(none)]> USE mysql; -MariaDB [mysql]> ALTER USER ''@'localhost' IDENTIFIED BY ''; -MariaDB [mysql]> FLUSH PRIVILEGES; -``` - -Now logout and login again with - -``` -$ ~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf --password mysql -``` - -enter the newly set password and voila, you are logged in and your user has the password set up. - -Now, set up a new user, say webqtlout, and a default database they can connect to - -``` -MariaDB [mysql]> CREATE DATABASE webqtlout; -MariaDB [mysql]> CREATE USER 'webqtlout'@'localhost' IDENTIFIED BY ''; -MariaDB [mysql]> GRANT ALL PRIVILEGES ON webqtlout.* TO 'webqtlout'@'localhost'; -``` - -Now logout, and log back in as the new webqtlout user: - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf \ - --user=webqtlout --host=localhost --password webqtlout -``` - -and enter the password you provided. - -### Setting up the Small Database - -Download the database from - -=> http://ipfs.genenetwork.org/ipfs/QmRUmYu6ogxEdzZeE8PuXMGCDa8M3y2uFcfo4zqQRbpxtk - -Say you downloaded the file in ~/Downloads, you can now add the database to your server. - -First stop the server: - -``` -$ ps aux | grep mysqld # get the process ids -$ kill -s SIGTERM -``` - -Now extract the database archive in the mysql data directory: - -``` -$ cd ~/genenetwork/mariadb/var/lib/data -$ p7zip -k -d ~/Downloads/db_webqtl_s.7z -``` - -Now restart the server: - -``` -~/opt/gn_profiles/gn2_latest/bin/mysqld_safe \ - --defaults-file=~/genenetwork/mariadb/my.cnf -``` - -Then update the databases - -``` -$ ~/opt/gn_profiles/gn2_latest/bin/mysql_upgrade \ - --defaults-file=~/genenetwork/mariadb/my.cnf \ - --user=frederick --password --force -``` - -and login as the administrative user: - -``` -$ ~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf \ - --user=$(whoami) --password -``` - -and grant the privileges to your normal user: - -``` -MariaDB [mysql]> GRANT ALL PRIVILEGES ON db_webqtl_s.* TO 'webqtlout'@'localhost'; -``` - -now logout as the administrative user and log back in as the normal user - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf \ - --user=webqtlout --host=localhost --password db_webqtlout_s - -MariaDB [db_webqtlout_s]> SELECT * FROM ProbeSetData LIMIT 20; -``` - -verify you see some data. - -### A Note on Connection to the Server - -So far, we have been connecting to the server by specifying --defaults-file option, e.g. - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql \ - --defaults-file=~/genenetwork/mariadb/my.cnf \ - --user=webqtlout --host=localhost --password db_webqtlout_s -``` - -which allows connection via the unix socket. - -We could drop that specification and connect via the port with: - -``` -~/opt/gn_profiles/gn2_latest/bin/mysql \ - --user=webqtlout --host=127.0.0.1 --port=3307 --password db_webqtlout_s -``` - -In this version, the host specification was changed from -``` ---host=localhost -``` -to -``` ---host=127.0.0.1 -``` - -whereas, the **--defaults-file** file specification was dropped and a new **--port** specification was added. diff --git a/topics/starting_gn1.gmi b/topics/starting_gn1.gmi deleted file mode 100644 index efbfd0f..0000000 --- a/topics/starting_gn1.gmi +++ /dev/null @@ -1,102 +0,0 @@ -# Starting GN1 - -The GN1 repos are at - -=> https://github.com/genenetwork/genenetwork1 - -Branches are: - -* master: my main branch - used in pjotr-test -* lily: running but almost discontinued -* production: on tux01 - -Note that there are some hard coded paths/IPs - so simply merging is not a great idea. - -On tux01 GN1 is running inside a Guix container. - -Start a screen and run the guix deploy script. See the README file in - -gn1@tux01:~/production/gnshare/gn - -# Guix - -At this point GN1 is fixed at Feb 2021: - -guix: 1.2.0-12.dffc918 -guix-past: 159be3d7e86e1f22b2b7b1efc938ed63120dc973 -guix-bioinformatics: 697a66bf0e897a101e8e3cefbaf250491039fe93 - -# Building - -On an update of guix the build may fail. Try - -``` -~/opt/guix-gn1/bin/guix build - -L /home/gn1/guix-past/modules/ \ - -L /home/gn1/guix-bioinformatics/ \ - genenetwork1 -``` - -## Updating mariadb connection on lily - -``` - restart apache in lily - [root@lily base]# /etc/init.d/httpd restart - Stopping httpd: [ OK ] - Starting httpd: [ OK ] - [root@lily base]# pwd - /gnshare/gn/web/webqtl/base - [root@lily base]# - /gnshare/gn/web/webqtl/base/webqtlConfigLocal.py - #######################################' - # Environment Variables - private - ######################################### - # sql_host = '[1]tux02.uthsc.edu' - # sql_host = '128.169.4.67' - sql_host = '172.23.18.213' - SERVERNAME = sql_host - MYSQL_SERVER = sql_host - DB_NAME = 'db_webqtl' - DB_USER = 'x' - DB_PASSWD = 'x' - MYSQL_UPDSERVER = sql_host - DB_UPDNAME = 'db_webqtl' - DB_UPDUSER = 'x' - DB_UPDPASSWD = 'x' - GNROOT = '/gnshare/gn/' - PythonPath = '/usr/bin/python' - PIDDLE_FONT_PATH = - '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' -``` - -SQL may also need to be updated here: - -=> /gnshare/gn/web/webqtl/base/webqtlConfigLocal.py -=> /gnshare/gn/web/infoshare/includes/config.html -=> /gnshare/gn/web/infoshare/includes/db.inc - -## Updating from lily - -Git sync - -``` -gn1@tux01:~/production/gnshare/gn-pjotr-test$ -git checkout lily -git pull pjotr@lily.genenetwork.org:/gnshare/gn/ -``` - -Menu sync - -``` -gn1@tux01:~/production/gnshare/gn-pjotr-test$ -scp pjotr@lily.genenetwork.org:/gnshare/gn/web/javascript/*.js web/javascript/ -git status -``` - -## Updating httpd.conf - -To update the httpd.conf you need to edit the system file in guix-bioinformatics. It can be built with - -``` -guix build -L ~/guix-past/modules/ -L ~/guix-bioinformatics/ -e '(@ (gn services gn1-httpd-config) GN1-httpd-config)' -``` diff --git a/topics/starting_gn2_and_gn3.gmi b/topics/starting_gn2_and_gn3.gmi deleted file mode 100644 index 1cfed14..0000000 --- a/topics/starting_gn2_and_gn3.gmi +++ /dev/null @@ -1,52 +0,0 @@ -# How to Start GN2 and GN3 - -This document describes in a short how we run GN2 and GN3 on the current production setup. - -Note that we should replace this with a system container. - -This details how GN2/GN3 production are currently started. It's probably a good idea to create a shell script for starting GN3 like we have for GN2 at some point, since currently environment variables are set manually. - -See also - -=> systems/gn-services.gmi - -# GN3 - -GN2 depends on GN3 for REST services and libraries. - -## Environment - -Set PATH/PYTHONPATH/GN2_PROFILE environment variables - -Example: - -``` -export GN2_PROFILE=/home/zas1024/opt/gn-latest-20221206 -export PATH=$GN2_PROFILE/bin:$PATH -export PYTHONPATH="$GN2_PROFILE/lib/python3.9/site-packages" -``` - -## Start development on port 8081 - -Start GN3 from the relevant directory - -``` -env FLASK_DEBUG=1 FLASK_APP="main.py" CORS_ORIGINS="http://gn2-zach.genenetwork.org:*,https://gn2-zach.genenetwork.org:*,http://genenetwork.org:*,https://genenetwork.org:*" flask run --port=8081 -``` - -GN3 has a settings.py file now. See the README. - -## Start production on port 8087 - -``` -gn2@tux01: -cd ~/gn3_production/genenetwork3 -gunicorn --bind 0.0.0.0:8087 --workers 8 --keep-alive 6000 --max-requests 10 --max-requests-jitter 5 --timeout 1200 wsgi:app -``` - -Note I had to comment out some oauth stuff on the latest. - - -# GN2 - -1. Just run /home/gn2/production/run_production.sh diff --git a/topics/temp-trait-submission.gmi b/topics/temp-trait-submission.gmi deleted file mode 100644 index 7029e2a..0000000 --- a/topics/temp-trait-submission.gmi +++ /dev/null @@ -1,11 +0,0 @@ -# How to Submit a Temp trait (for testing purposes or otherwise) - -1. Click Submit Trait under the Intro dropdown in the header bar - -2. Copy select the species and group you want to submit for from the dropdowns under Step 1 (I just use Mouse/BXD for testing) - -3. Navigate to the following GN1 link to get a sample list of trait values (so in this case navigate down to BXD) - https://gn1.genenetwork.org/RIsample.html - -4. Copy and paste those values into the Step 2 text area - -5. Click Submit Trait (which should then take you to a trait page with the submitted sample values) diff --git a/topics/use-exceptions-to-indicate-errors.gmi b/topics/use-exceptions-to-indicate-errors.gmi deleted file mode 100644 index e302dd3..0000000 --- a/topics/use-exceptions-to-indicate-errors.gmi +++ /dev/null @@ -1,16 +0,0 @@ -# Use exceptions to indicate errors - -Often, we indicate that a function has encountered an error by returning a None value. Here's why this is a bad idea and why you should use exceptions instead. - -When we return None values to indicate errors, we have to take care to check the return value of every function call and propagate errors higher and higher up the function call stack until we reach a point where the error is handled. This clutters up the code, and is one reason why writing correct code in languages like C that don't have exceptions is a pain. - -With exceptions, we only have to create an exception handler (try/except block in Python) at the highest level. Any exception raised by functions below that level are automatically passed on to the except block with no additional programmer effort. - -Here's an example where we run mapping, and if there's an error, we return an error page. Else, we return the results page. Notice that we do not check the return value template_vars. -``` -try: - template_vars = run_mapping.RunMapping(start_vars, temp_uuid) - return render_template("mapping_results.html", **template_vars) -except: - return render_template("mapping_error.html") -``` diff --git a/topics/useful-shell-scripts.gmi b/topics/useful-shell-scripts.gmi deleted file mode 100644 index 908928e..0000000 --- a/topics/useful-shell-scripts.gmi +++ /dev/null @@ -1,52 +0,0 @@ -# Useful Shell Scripts - -I make it a habit to put all my user-scripts in `~/bin/' and add that to my `$PATH'. Another useful tip is to prepend all my scripts using a "," - this allows me to quickly take advantage of zsh's autocomplete. For the curious, you could also adopt quiuy as part of the scripts - it has the advantage of adding more semantic meaning to your scripts. - -Most of these scripts were borrowed from: - -=> https://git.sr.ht/~whereiseveryone/dot/tree/master/item/bin - -Here are the scripts that make me more efficient on the terminal: - -* Run a script in the context of a directory: ",run-with-dir" - -``` -#!/bin/sh -set -eo pipefail - -# Run a command in specific directory -run_within_dir() { - target_dir="$1" - previous_dir=$(pwd) - shift - cd $target_dir && "$@" - cd $previous_dir -} - -run_within_dir $@ -``` - -If you are in `$HOME', you can do something like: "run-within-dir /tmp" - -* Choose a guix profile on the fly: ",choose-profile": - -``` -#!/bin/env sh - -# To run this use source! - -GUIX_PROFILE="$(guix package --list-profiles | fzf --multi)" - -export GUIX_PROFILE -. "$GUIX_PROFILE/etc/profile" -``` - -* Run magit from any terminal: ",magit". You can take of fzf's autocomplete - -``` -#!/bin/env sh - -emacsclient --eval "(projectile-vc \"$PWD/$@\")" -``` - -Should these scripts become too many, a repository will be created and a link added to point there. diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi deleted file mode 100644 index 651fb83..0000000 --- a/topics/uthsc-vpn-with-free-software.gmi +++ /dev/null @@ -1,60 +0,0 @@ -# UTHSC VPN with free software - -It is possible to connect to the UTHSC VPN using only free software. For this, you need the openconnect-sso package. openconnect-sso is a wrapper around openconnect that handles the web-based single sign-on and runs openconnect with the right arguments. -=> https://github.com/vlaci/openconnect-sso/ openconnect-sso -=> https://www.infradead.org/openconnect/ openconnect - -To connect, run openconnect-sso as follows. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN. -``` -$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -``` -Note that openconnect-sso should be run as a regular user, not as root. After passing Duo authentication, openconnect-sso will try to gain root priviliges to set up the network routes. At that point, it will prompt you for your password using sudo. - -## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling) - -openconnect, by default, tunnels all your traffic through the VPN. This is not good for your privacy. It is better to tunnel only the traffic destined to the specific hosts that you want to access. This can be done using the vpn-slice script. -=> https://github.com/dlenski/vpn-slice/ vpn-slice - -For example, to connect to the UTHSC VPN but only access the hosts tux01 and tux02e through the VPN, run the following command. -``` -$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -- --script 'vpn-slice tux01 tux02e' -``` -The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation. - -## Unsafe legacy TLS renegotiation - -The UTHSC VPN still requires unsafe legacy TLS renegotiation. This is disabled by default on the latest Guix. We need to re-enable it by configuring openssl.cnf as described on the following stackoverflow page. -=> https://stackoverflow.com/questions/71603314/ssl-error-unsafe-legacy-renegotiation-disabled -Here's a quick summary. Put the following in some file, say /tmp/openssl.cnf -``` -openssl_conf = openssl_init - -[openssl_init] -ssl_conf = ssl_sect - -[ssl_sect] -system_default = system_default_sect - -[system_default_sect] -Options = UnsafeLegacyRenegotiation -``` -Set the environment variable OPENSSL_CONF to point to this file. -``` -export OPENSSL_CONF=/tmp/openssl.cnf -``` -Then, run the openconnect-sso client as usual. - -## Putting it all together using Guix G-expressions - -Remembering to do all these steps is a hassle. Writing a shell script to automate this is a good idea, but why write shell scripts when we have G-expressions! Here's a G-expression script that I prepared earlier. -=> uthsc-vpn.scm -Download it, tweak the %hosts variable to specify the hosts you are interested in, and run it like so: -``` -$(guix build -f uthsc-vpn.scm) -``` - -## Acknowledgement - -Many thanks to Pjotr Prins and Erik Garrison without whose earlier work this guide would not be possible. -=> https://github.com/pjotrp/linux-at-university-of-tennessee -=> https://github.com/ekg/openconnect-sso-docker diff --git a/topics/uthsc-vpn.scm b/topics/uthsc-vpn.scm deleted file mode 100644 index c714731..0000000 --- a/topics/uthsc-vpn.scm +++ /dev/null @@ -1,44 +0,0 @@ -(use-modules ((gnu packages guile-xyz) #:select (guile-ini guile-lib guile-smc)) - ((gnu packages vpn) #:select (openconnect-sso vpn-slice)) - (guix gexp)) - -;; Put in the hosts you are interested in here. -(define %hosts - (list "octopus01" - "tux01.genenetwork.org")) - -(define (ini-file name scm) - "Return a file-like object representing INI file with @var{name} and -@var{scm} data." - (computed-file name - (with-extensions (list guile-ini guile-lib guile-smc) - #~(begin - (use-modules (srfi srfi-26) - (ini)) - - (call-with-output-file #$output - (cut scm->ini #$scm #:port <>)))))) - -(define uthsc-vpn - (with-imported-modules '((guix build utils)) - #~(begin - (use-modules (guix build utils)) - - (setenv "OPENSSL_CONF" - #$(ini-file "openssl.cnf" - #~'((#f - ("openssl_conf" . "openssl_init")) - ("openssl_init" - ("ssl_conf" . "ssl_sect")) - ("ssl_sect" - ("system_default" . "system_default_sect")) - ("system_default_sect" - ("Options" . "UnsafeLegacyRenegotiation"))))) - (invoke #$(file-append openconnect-sso "/bin/openconnect-sso") - "--server" "uthscvpn1.uthsc.edu" - "--authgroup" "UTHSC" - "--" - "--script" (string-join (cons #$(file-append vpn-slice "/bin/vpn-slice") - '#$%hosts)))))) - -(program-file "uthsc-vpn" uthsc-vpn) -- cgit v1.2.3