From 93eea6803c4206a1cdc7956413df746de60583ee Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 17 Aug 2022 14:29:12 +0300
Subject: Topic: Queries (On preventing SQL Injection).

---
 .../queries-and-prepared-statements-in-python.gmi  | 87 ++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 topics/queries-and-prepared-statements-in-python.gmi
diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi
new file mode 100644
index 0000000..ca6510e
--- /dev/null
+++ b/topics/queries-and-prepared-statements-in-python.gmi
@@ -0,0 +1,87 @@
+# Queries and Prepared Statements in Python
+
+String interpolation when writing queries is a really bad idea; it leads to exposure to SQL Injection attacks. To mitigate against this, we need to write queries using placeholders for values, then passing in the values as arguments to the **execute** function.
+
+As a demonstration, using some existing code, do not write a query like this:
+
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN {}
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = '{}'
+>     """.format(
+>         create_in_clause(list(sample_data.keys())),
+>         *mescape(dataset.group.species)))
+
+In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values.
+
+The code above can be rewritten to something like:
+
+> sample_data_keys = tuple(key for key in sample_data.keys())
+>
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN ({})
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = %s
+>     """.format(", ".join(sample_data_keys)),
+>     (sample_data_keys + (dataset.group.species,)))
+
+In this new query, the IN clause ends up being a string of the form
+
+> %s, %s, %s, ...
+
+for the total number of items in the 'sample_data_key' tuple.
+
+There is one more '%s' placeholder for the 'Species.name' value, so, the final tuple we provide as an argument to execute needs to add the 'dataset.group.species' value.
+
+**IMPORTANT 01**: the total number of placeholders (%s) must be the same as the total number of arguments passed into the 'execute' function.
+
+**IMPORTANT 02**: the order of the values must correspond to the order of the placeholders.
+
+### Aside
+
+The functions 'create_in_clause' and 'mescape' are defined as below:
+
+> from MySQLdb import escape_string as escape_
+> 
+> def create_in_clause(items):
+>     """Create an in clause for mysql"""
+>     in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+>     in_clause = '( {} )'.format(in_clause)
+>     return in_clause
+> 
+> def mescape(*items):
+>     """Multiple escape"""
+>     return [escape_(str(item)).decode('utf8') for item in items]
+>
+> def escape(string_):
+>     return escape_(string_).decode('utf8')
+
+
+## Parameter Style
+
+In the section above, we show the most common parameter style used in most cases.
+
+If you want to use a mapping object (dict), you have the option of using the '%(<text>)s' format for the query. In that case, we could rewrite the query above into something like:
+
+> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
+>
+> curr.execute(
+>     """
+>         SELECT Strain.Name, Strain.Id FROM Strain, Species
+>         WHERE Strain.Name IN ({})
+>         and Strain.SpeciesId=Species.Id
+>         and Species.name = %(species_name)s
+>     """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
+>     {**sample_data_dict, "species_name": dataset.group.species})
+
+## Final Note
+
+While this has dealt mostly with the MySQLdb driver for Python3, the idea is the same for the psycopg2 (PostgreSQL) driver and others (with some minor variation in the details).
+
+The concept is also similar in many other languages.
+
+The main takeaway is that you really should not be manually escaping the values - instead, you should let the driver do that for you, by providing placeholders in the query, and the values to use separately.
-- 
cgit v1.2.3


From 5cca46a2eed70cb440aa88bc29b0e321794f70c8 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 17 Aug 2022 15:55:35 +0300
Subject: Use pre-formatted text blocks for source code

---
 .../queries-and-prepared-statements-in-python.gmi  | 94 ++++++++++++----------
 1 file changed, 51 insertions(+), 43 deletions(-)

diff --git a/topics/queries-and-prepared-statements-in-python.gmi b/topics/queries-and-prepared-statements-in-python.gmi
index ca6510e..642ed96 100644
--- a/topics/queries-and-prepared-statements-in-python.gmi
+++ b/topics/queries-and-prepared-statements-in-python.gmi
@@ -4,30 +4,34 @@ String interpolation when writing queries is a really bad idea; it leads to expo
 
 As a demonstration, using some existing code, do not write a query like this:
 
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN {}
->         and Strain.SpeciesId=Species.Id
->         and Species.name = '{}'
->     """.format(
->         create_in_clause(list(sample_data.keys())),
->         *mescape(dataset.group.species)))
+```
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN {}
+        and Strain.SpeciesId=Species.Id
+        and Species.name = '{}'
+    """.format(
+        create_in_clause(list(sample_data.keys())),
+        *mescape(dataset.group.species)))
+```
 
 In the query above, we interpolate the values of the 'sample_data.keys()' values and that of the 'dataset.group.species' values.
 
 The code above can be rewritten to something like:
 
-> sample_data_keys = tuple(key for key in sample_data.keys())
->
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN ({})
->         and Strain.SpeciesId=Species.Id
->         and Species.name = %s
->     """.format(", ".join(sample_data_keys)),
->     (sample_data_keys + (dataset.group.species,)))
+```
+sample_data_keys = tuple(key for key in sample_data.keys())
+
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN ({})
+        and Strain.SpeciesId=Species.Id
+        and Species.name = %s
+    """.format(", ".join(sample_data_keys)),
+    (sample_data_keys + (dataset.group.species,)))
+```
 
 In this new query, the IN clause ends up being a string of the form
 
@@ -45,20 +49,22 @@ There is one more '%s' placeholder for the 'Species.name' value, so, the final t
 
 The functions 'create_in_clause' and 'mescape' are defined as below:
 
-> from MySQLdb import escape_string as escape_
-> 
-> def create_in_clause(items):
->     """Create an in clause for mysql"""
->     in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
->     in_clause = '( {} )'.format(in_clause)
->     return in_clause
-> 
-> def mescape(*items):
->     """Multiple escape"""
->     return [escape_(str(item)).decode('utf8') for item in items]
->
-> def escape(string_):
->     return escape_(string_).decode('utf8')
+```
+from MySQLdb import escape_string as escape_
+
+def create_in_clause(items):
+    """Create an in clause for mysql"""
+    in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+    in_clause = '( {} )'.format(in_clause)
+    return in_clause
+
+def mescape(*items):
+    """Multiple escape"""
+    return [escape_(str(item)).decode('utf8') for item in items]
+
+def escape(string_):
+    return escape_(string_).decode('utf8')
+```
 
 
 ## Parameter Style
@@ -67,16 +73,18 @@ In the section above, we show the most common parameter style used in most cases
 
 If you want to use a mapping object (dict), you have the option of using the '%(<text>)s' format for the query. In that case, we could rewrite the query above into something like:
 
-> sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
->
-> curr.execute(
->     """
->         SELECT Strain.Name, Strain.Id FROM Strain, Species
->         WHERE Strain.Name IN ({})
->         and Strain.SpeciesId=Species.Id
->         and Species.name = %(species_name)s
->     """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
->     {**sample_data_dict, "species_name": dataset.group.species})
+```
+sample_data_dict = {f"sample_{idx}: key for idx,key in enumerate(sample_data.keys())}
+
+curr.execute(
+    """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN ({})
+        and Strain.SpeciesId=Species.Id
+        and Species.name = %(species_name)s
+    """.format(", ".join([f"%({key})s" for key in sample_data_dict.keys()])),
+    {**sample_data_dict, "species_name": dataset.group.species})
+```
 
 ## Final Note
 
-- 
cgit v1.2.3


From bf2bb362b7127b9580ab2ad2a976747491dde850 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 19 Aug 2022 07:20:42 +0300
Subject: Documentation: Setting up local mariadb server for development.

---
 topics/setting-up-local-development-database.gmi | 76 ++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 topics/setting-up-local-development-database.gmi

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
new file mode 100644
index 0000000..ef69326
--- /dev/null
+++ b/topics/setting-up-local-development-database.gmi
@@ -0,0 +1,76 @@
+# Setting up Local Development Database
+
+## Introduction
+
+You need to setup a quick local database for development without needing root permissions and polluting your environment.
+
+* ${HOME} is the path to your home directory
+* An assumption is made that the GeneNetwork2 profile is in ${HOME}/opt/gn_profiles/gn2_latest for the purposes of this documentation. Please replace as appropriate.
+* We install the database files under ${HOME}/genenetwork/mariadb. Change as appropriate.
+
+## Steps
+
+Step 01: Setup directories
+
+```
+mkdir -pv ${HOME}/genenetwork/mariadb/var/run
+mkdir -pv ${HOME}/genenetwork/mariadb/var/lib/data
+mkdir -pv ${HOME}/genenetwork/mariadb/var/lib/mysql
+```
+
+Setup default my.cnf
+
+```
+cat <<EOF > ${HOME}/genenetwork/mariadb/my.cnf
+[client-server]
+socket=${HOME}/genenetwork/mariadb/var/run/mysqld/mysqld.sock
+port=3307
+
+[server]
+user=$(whoami)
+socket=${HOME}/genenetwork/mariadb/var/run/mysqld/mysqld.sock
+basedir=${HOME}/opt/gn_profiles/gn2_latest
+datadir=${HOME}/genenetwork/mariadb/var/lib/data
+ft_min_word_len=3
+EOF
+```
+
+Install the database
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql_install_db \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf
+```
+
+Running the daemon:
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysqld_safe \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf
+```
+
+Connect to daemon
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf
+```
+
+Set up password for user
+
+```
+MariaDB [(none)]> USE mysql;
+MariaDB [mysql]> ALTER USER '<your-username>'@'localhost' IDENTIFIED BY '<the-new-password>';
+MariaDB [mysql]> FLUSH PRIVILEGES;
+```
+
+Now logout and login again with
+
+```
+$ ${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf --password mysql
+```
+
+enter the newly set password and voila, you are logged in and your user has the password set up.
+
+Continue to setup other databases as appropriate.
-- 
cgit v1.2.3


From 76831a33264b0cb6bff6f39ccfeb3721ecd61bfb Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 19 Aug 2022 07:36:41 +0300
Subject: Documentation: Setup new user, and their default database.

---
 topics/setting-up-local-development-database.gmi | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
index ef69326..c2856a8 100644
--- a/topics/setting-up-local-development-database.gmi
+++ b/topics/setting-up-local-development-database.gmi
@@ -10,7 +10,7 @@ You need to setup a quick local database for development without needing root pe
 
 ## Steps
 
-Step 01: Setup directories
+Setup directories
 
 ```
 mkdir -pv ${HOME}/genenetwork/mariadb/var/run
@@ -73,4 +73,20 @@ $ ${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
 
 enter the newly set password and voila, you are logged in and your user has the password set up.
 
-Continue to setup other databases as appropriate.
+Now, setup a new user, say webqtlout, and a default database they can connect to
+
+```
+MariaDB [mysql]> CREATE DATABASE webqtlout;
+MariaDB [mysql]> CREATE USER 'webqtlout'@'localhost' IDENTIFIED BY '<some-password>';
+MariaDB [mysql]> GRANT ALL PRIVILEGES ON webqtlout.* TO 'webqtlout'@'localhost';
+```
+
+Now logout, and log back in as the new webqtlout user:
+
+```
+/home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+    --user=webqtlout --host=localhost --password webqtlout
+```
+
+and enter the password you provided.
-- 
cgit v1.2.3


From 8e35edfd729eb0d6c8258e2a6bf2f0aa8b26cc15 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 19 Aug 2022 08:30:09 +0300
Subject: Documentation: Setup small database

---
 topics/setting-up-local-development-database.gmi | 66 +++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
index c2856a8..3c6c291 100644
--- a/topics/setting-up-local-development-database.gmi
+++ b/topics/setting-up-local-development-database.gmi
@@ -8,7 +8,7 @@ You need to setup a quick local database for development without needing root pe
 * An assumption is made that the GeneNetwork2 profile is in ${HOME}/opt/gn_profiles/gn2_latest for the purposes of this documentation. Please replace as appropriate.
 * We install the database files under ${HOME}/genenetwork/mariadb. Change as appropriate.
 
-## Steps
+## Setup Database Server
 
 Setup directories
 
@@ -90,3 +90,67 @@ Now logout, and log back in as the new webqtlout user:
 ```
 
 and enter the password you provided.
+
+
+## Setting up the Small Database
+
+Download the database from
+
+=> http://ipfs.genenetwork.org/ipfs/QmRUmYu6ogxEdzZeE8PuXMGCDa8M3y2uFcfo4zqQRbpxtk
+
+Say you downloaded the file in ${HOME}/Downloads, you can now add the database to your server.
+
+First stop the server:
+
+```
+$ ps aux | grep mysqld # get the process ids
+$ kill -s SIGTERM <pid-of-mysqld> <pid-of-mysqld_safe>
+```
+
+Now extract the database archive in the mysql data directory:
+
+```
+$ cd ${HOME}/genenetwork/mariadb/var/lib/data
+$ p7zip -k -d ${HOME}/Downloads/db_webqtl_s.7z
+```
+
+Now restart the server:
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysqld_safe \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf
+```
+
+Then update the databases
+
+```
+$ /home/frederick/opt/gn_profiles/gn2_latest/bin/mysql_upgrade \
+    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+    --user=frederick --password --force
+```
+
+and login as the administrative user:
+
+```
+$ /home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+    --user=$(whoami) --password
+```
+
+and grant the privileges to your normal user:
+
+```
+MariaDB [mysql]> GRANT ALL PRIVILEGES ON db_webqtl_s.* TO 'webqtlout'@'localhost';
+```
+
+now logout as the administrative user and log back in as the normal user
+
+```
+/home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+    --user=webqtlout --host=localhost --password db_webqtlout_s
+
+MariaDB [db_webqtlout_s]> SELECT * FROM ProbeSetData LIMIT 20;
+```
+
+verify you see some data.
-- 
cgit v1.2.3


From 5387418a09e86f83290a26453af353e0da27531f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 19 Aug 2022 18:34:21 +0200
Subject: Install GN2

---
 issues/systems/gn2-time-machines.gmi | 80 ++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/issues/systems/gn2-time-machines.gmi b/issues/systems/gn2-time-machines.gmi
index 68ddaa9..626eff4 100644
--- a/issues/systems/gn2-time-machines.gmi
+++ b/issues/systems/gn2-time-machines.gmi
@@ -2,9 +2,7 @@
 
 GN1 time machines are pretty straightforward. With GN2 the complexity has increased a lot because of interacting services and a larger dependency graph.
 
-Here I track what it takes today to install an instance of GN2 that is 'frozen' in time.
-
-- [X] Install Mariadb and recover production DB (est. 3-4 hrs)
+Here I track what it takes today to install a fallback instance of GN2 that is 'frozen' in time.
 
 ## Tags
 
@@ -16,14 +14,14 @@ Here I track what it takes today to install an instance of GN2 that is 'frozen'
 
 ## Tasks
 
-General time line:
+Also a time line:
 
-* [X] Install machine software and physical (4 hours)
+* [X] Install machine software and physical (est. 4-8 hours)
 * [X] Sync backups on a daily basis and add monitoring (2 hours)
 * [X] Set up Mariadb and sync from backup (4 hours)
-* [ ] GN2 production environment
-* [ ] GN3 aliases server (Racket)
+* [X] GN2 production environment with nginx & genotype_files (2 hours)
 * [ ] GN3 Genenetwork3 service (Python)
+* [ ] GN3 aliases server (Racket)
 * [ ] GN3 auth proxy (Racket)
 * [ ] set up https and letsencrypt
 * [ ] setup logrotate for production log files
@@ -41,6 +39,18 @@ guix pull -p ~/opt/guix-pull
 guix package -i mariadb -p /usr/local/guix-profiles/mariadb
 ```
 
+To get to genenetwork we use a channel. The last working channel on the CI can be downloaded from https://ci.genenetwork.org/channels.scm. Now do
+
+```
+guix pull -C channels.scm -p ~/opt/guix-gn-channel
+. ~/opt/guix-gn-channel/etc/profile
+guix package -i genenetwork2 -p ~/opt/genenetwork2
+```
+
+That sets the profile to ~/opt/genenetwork2.
+
+Note that these commands may take a while. And when guix starts building lots of software it may be necessary to configure a substitute server (we use guix.genenetwork.org) adding --substitute-urls="http://guix.genenetwork.org https://ci.guix.info".
+
 ### Mariadb (est. 1-2 hours)
 
 Set up a global Mariadb
@@ -129,3 +139,59 @@ In the process I discover that ibdata1 file has grown to 100GB. Not a problem ye
 => https://www.percona.com/blog/2013/08/20/why-is-the-ibdata1-file-continuously-growing-in-mysql/
 
 (obviously we don't want to use mysqldump right now, but I'll need to do some future work).
+
+### Setting up GN2
+
+Create a gn2 user and checkout the git repo in /home/gn2/production/gene. Note that there exists also a backup of gn2 in borg which has a 'run_production.sh' script.
+
+Running the script will give feedback
+
+```
+su gn2
+cd /home/gn2/production/
+sh run_production.sh
+```
+
+You'll find you need the Guix install of gn2. Starting with guix section above.
+
+### Genotype files
+
+GN2 requires a set of files that is in the backup
+
+```
+borg extract borg-genenetwork::borg-ZACH-home-20220819-04:04-Fri home/zas1024/gn2-zach/genotype_files/
+```
+
+move the genotype_files and update the path in `gn2_settings.py` which is in the same dir as the run_production.sh script.
+
+### Configure Nginx
+
+You'll need to tell Nginx to forward to the web server. Something like:
+
+```
+server {
+    listen 80;
+    server_name gn2-fallback.genenetwork.org;
+
+    access_log  /var/log/nginx/gn2-danny-access.log;
+    error_log  /var/log/nginx/gn2-danny-error.log;
+
+    location / {
+            proxy_pass         http://127.0.0.1:5000/;
+            proxy_redirect     off;
+
+            proxy_set_header   Host             $host;
+            proxy_set_header   X-Real-IP        $remote_addr;
+            proxy_set_header   X-Forwarded-For  $proxy_add_x_forwarded_for;
+
+      client_max_body_size 8050m;
+      proxy_read_timeout 300;
+      proxy_connect_timeout 300;                                                                                 proxy_send_timeout 300;
+
+    }
+}
+```
+
+### Setting up GN3
+
+Without gn3 the menu will not show on the main page and you see 'There was an error retrieving and setting the menu. Try again later.'
-- 
cgit v1.2.3


From a820a04565a6822d4a6239963d164c3b0e40a259 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Sat, 20 Aug 2022 15:23:54 +0530
Subject: GEMMA should report missing genotype file.

---
 issues/gemma/report-missing-genotype-file.gmi | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 issues/gemma/report-missing-genotype-file.gmi

diff --git a/issues/gemma/report-missing-genotype-file.gmi b/issues/gemma/report-missing-genotype-file.gmi
new file mode 100644
index 0000000..a801d70
--- /dev/null
+++ b/issues/gemma/report-missing-genotype-file.gmi
@@ -0,0 +1,3 @@
+# GEMMA should report name of missing genotype file
+
+When genenetwork is unable to find a genotype file that GEMMA needs, it should report the name of the missing file in the error message. The correct way to do this is it raise a FileNotFoundError lower down close to the GEMMA call, and handle it higher up close to the web UI.
-- 
cgit v1.2.3


From 0fde34533b01168c5de81fa63f8b08ad79c10da6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 21 Aug 2022 10:14:25 +0200
Subject: Install GN2

---
 issues/systems/gn2-time-machines.gmi | 107 +++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/issues/systems/gn2-time-machines.gmi b/issues/systems/gn2-time-machines.gmi
index 626eff4..513a91a 100644
--- a/issues/systems/gn2-time-machines.gmi
+++ b/issues/systems/gn2-time-machines.gmi
@@ -195,3 +195,110 @@ server {
 ### Setting up GN3
 
 Without gn3 the menu will not show on the main page and you see 'There was an error retrieving and setting the menu. Try again later.'
+
+GN3 is a separate REST server that has its own dependencies. A bit confusingly it is also a Python module dependency for GN2. So we need to set up both 'routes'.
+
+First checkout the genenetwork3 repo as gn2 user
+
+```
+su gn2
+cd /home/gn2
+mkdir -p gn3_production
+cd gn3_production
+git clone https://github.com/genenetwork/genenetwork3.git
+```
+
+Check the genenetwork3 README for latest instructions on starting the service as a Guix container. Typically
+
+```
+guix shell -C --network --expose=$HOME/production/genotype_files/ -Df guix.scm
+```
+
+where genotype_files is the dir you installed earlier.
+
+Run it with, for example
+
+```
+export FLASK_APP="main.py"
+flask run --port=8081
+```
+
+I.e., the same port as GN2 expects in gn2_settings.py. Test with
+
+```
+curl localhost:8081/api/version
+"1.0"
+```
+
+Next set up the external API with nginx by adding the following path to above definition:
+
+```
+    location /gn3 {
+            rewrite /gn3/(.*) /$1  break;
+            proxy_pass         http://127.0.0.1:8081/;
+            proxy_redirect     off;
+            proxy_set_header   Host $host;
+    }
+```
+
+and if DNS is correct you should get
+
+```
+curl gn2-fallback.genenetwork.org/gn3/api/version
+"1.0"
+```
+
+To generate the main menu the server does a request to
+$.ajax(gn_server_url +'api/menu/generate/json. On production that is
+https://genenetwork.org/api3/api/menu/generate/json which is actually gn3(!)
+
+```
+curl http://gn2-fallback.genenetwork.org/gn3/api/menu/generate/json
+```
+
+If this gives an error check the gn3 output log.
+
+Perhaps obviously, on a production server GN3 should be running as a proper service.
+
+### Alias service
+
+There is another GN3 service that resolves wikidata Gene aliases
+
+```
+su gn2
+cd ~/gn3_production
+git clone https://github.com/genenetwork/gn3.git
+```
+
+follow the instructions in the README and you should get
+
+```
+curl localhost:8000/gene/aliases/Shh
+["Hx","ShhNC","9530036O11Rik","Dsh","Hhg1","Hxl3","M100081","ShhNC"]
+```
+
+### Authentication proxy
+
+The proxy also needs to run.
+
+```
+su gn2
+cd ~/gn3_production
+git clone https://github.com/genenetwork/gn-proxy.git
+```
+
+See README
+
+### Trouble shooting
+
+Check the server log for errors from the server. There should be one in /home/gn2/production/tmp/. For example you may see
+
+```
+ERROR:wqflask:404: Not Found:  7:20AM UTC Aug 20, 2022: http://gn2-fallback.genenetwork.org/api/api/menu/generate/json
+```
+
+pointing out the setting in gn2_settings.py is wrong.
+
+Use the console bar of the browse to see what JS error you get.
+
+If you get CORS errors it is because you are using a server that is not genenetwork.org and this is usually a configuration issue.
-- 
cgit v1.2.3


From 7cb0678f263326983a24b97d366d6b2ef67ce58b Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 22 Aug 2022 07:38:57 +0300
Subject: Use ${HOME} for documentation.

---
 topics/setting-up-local-development-database.gmi | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
index 3c6c291..9d55d33 100644
--- a/topics/setting-up-local-development-database.gmi
+++ b/topics/setting-up-local-development-database.gmi
@@ -84,8 +84,8 @@ MariaDB [mysql]> GRANT ALL PRIVILEGES ON webqtlout.* TO 'webqtlout'@'localhost';
 Now logout, and log back in as the new webqtlout user:
 
 ```
-/home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
-    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf \
     --user=webqtlout --host=localhost --password webqtlout
 ```
 
@@ -124,16 +124,16 @@ ${HOME}/opt/gn_profiles/gn2_latest/bin/mysqld_safe \
 Then update the databases
 
 ```
-$ /home/frederick/opt/gn_profiles/gn2_latest/bin/mysql_upgrade \
-    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+$ ${HOME}/opt/gn_profiles/gn2_latest/bin/mysql_upgrade \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf \
     --user=frederick --password --force
 ```
 
 and login as the administrative user:
 
 ```
-$ /home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
-    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+$ ${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf \
     --user=$(whoami) --password
 ```
 
@@ -146,8 +146,8 @@ MariaDB [mysql]> GRANT ALL PRIVILEGES ON db_webqtl_s.* TO 'webqtlout'@'localhost
 now logout as the administrative user and log back in as the normal user
 
 ```
-/home/frederick/opt/gn_profiles/gn2_latest/bin/mysql \
-    --defaults-file=/home/frederick/genenetwork/mariadb/my.cnf \
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf \
     --user=webqtlout --host=localhost --password db_webqtlout_s
 
 MariaDB [db_webqtlout_s]> SELECT * FROM ProbeSetData LIMIT 20;
-- 
cgit v1.2.3


From 7e4b91fd0a314f90b21c2b62c32717949716ac49 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 22 Aug 2022 07:39:23 +0300
Subject: Add notes on connecting via TCP ports, rather than Unix Sockets

---
 topics/setting-up-local-development-database.gmi | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
index 9d55d33..ef2d31e 100644
--- a/topics/setting-up-local-development-database.gmi
+++ b/topics/setting-up-local-development-database.gmi
@@ -154,3 +154,33 @@ MariaDB [db_webqtlout_s]> SELECT * FROM ProbeSetData LIMIT 20;
 ```
 
 verify you see some data.
+
+### A Note on Connection to the Server
+
+So far, we have been connecting to the server by specifying --defaults-file option, e.g.
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --defaults-file=${HOME}/genenetwork/mariadb/my.cnf \
+    --user=webqtlout --host=localhost --password db_webqtlout_s
+```
+
+which allows connection via the unix socket.
+
+We could drop that specification and connect via the port with:
+
+```
+${HOME}/opt/gn_profiles/gn2_latest/bin/mysql \
+    --user=webqtlout --host=127.0.0.1 --port=3307 --password db_webqtlout_s
+```
+
+In this version, the host specification was changed from
+```
+--host=localhost
+```
+to
+```
+--host=127.0.0.1
+```
+
+^^^whereas, the --defaults-file file specification was dropped and a new --port specification was added.
-- 
cgit v1.2.3


From 62d21d2d4c48fe1bb40c8c00d545751596274747 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 22 Aug 2022 07:42:04 +0300
Subject: Add some emphasis

---
 topics/setting-up-local-development-database.gmi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/topics/setting-up-local-development-database.gmi b/topics/setting-up-local-development-database.gmi
index ef2d31e..67dd88d 100644
--- a/topics/setting-up-local-development-database.gmi
+++ b/topics/setting-up-local-development-database.gmi
@@ -183,4 +183,4 @@ to
 --host=127.0.0.1
 ```
 
-^^^whereas, the --defaults-file file specification was dropped and a new --port specification was added.
+whereas, the **--defaults-file** file specification was dropped and a new **--port** specification was added.
-- 
cgit v1.2.3


From f46f177a2b9c063125a71fdf0a0a4c221f05d41d Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Mon, 22 Aug 2022 22:31:37 +0530
Subject: Remove .envrc.

.envrc was added as a convenience for when gnbug lived in this
repository.

* .envrc: Delete file.
---
 .envrc | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 .envrc

diff --git a/.envrc b/.envrc
deleted file mode 100644
index d9ac5ca..0000000
--- a/.envrc
+++ /dev/null
@@ -1 +0,0 @@
-PATH_add .
\ No newline at end of file
-- 
cgit v1.2.3


From 195ffe2217e9a3b67d08255718f21097b73a49d8 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Mon, 22 Aug 2022 22:48:51 +0530
Subject: Document connecting to UTHSC VPN.

---
 topics/uthsc-vpn-with-free-software.gmi | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 topics/uthsc-vpn-with-free-software.gmi

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
new file mode 100644
index 0000000..f7f9fe0
--- /dev/null
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -0,0 +1,27 @@
+# UTHSC VPN with free software
+
+It is possible to connect to the UTHSC VPN using only free software. For this, you need the openconnect-sso package. openconnect-sso is a wrapper around openconnect that handles the web-based single sign-on and runs openconnect with the right arguments.
+=> https://github.com/vlaci/openconnect-sso/ openconnect-sso
+=> https://www.infradead.org/openconnect/ openconnect
+
+To connect, run openconnect-sso as follows and enter your password when prompted. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN.
+```
+$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup uthsc
+```
+
+## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling)
+
+openconnect, by default, tunnels all your traffic through the VPN. This is not good for your privacy. It is better to tunnel only the traffic destined to the specific hosts that you want to access. This can be done using the vpn-slice script.
+=> https://github.com/dlenski/vpn-slice/ vpn-slice
+
+For example, to connect to the UTHSC VPN but only access the hosts tux01 and tux02e through the VPN, run the following command.
+```
+$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup uthsc -- --script 'vpn-slice tux01 tux02e'
+```
+The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation.
+
+## Acknowledgement
+
+Many thanks to Pjotr Prins and Erik Garrison without whose earlier work this guide would not be possible.
+=> https://github.com/pjotrp/linux-at-university-of-tennessee
+=> https://github.com/ekg/openconnect-sso-docker
-- 
cgit v1.2.3


From d7d435b4fca7bdace548a3a0b0addcb14dcc281b Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Tue, 23 Aug 2022 13:17:09 +0530
Subject: Close Mechanical Rob issue.

---
 issues/resurrect-mechanical-rob.gmi | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/issues/resurrect-mechanical-rob.gmi b/issues/resurrect-mechanical-rob.gmi
index bea2a78..d456864 100644
--- a/issues/resurrect-mechanical-rob.gmi
+++ b/issues/resurrect-mechanical-rob.gmi
@@ -9,3 +9,11 @@ We need to run Mechanical Rob tests as part of our continuous integration tests.
 * status: in progress
 * type: enhancement
 * priority: medium
+
+## Resolution
+
+The Mechanical Rob CI tests are functioning again now. To see how to run Mechanical Rob, see the CI job definition in the genenetwork-machines repo.
+=> https://git.genenetwork.org/arunisaac/genenetwork-machines/src/branch/main/genenetwork-development.scm
+The invocation procedure is bound to change as the many environment variables in genenetwork2 are cleared up.
+
+* closed
-- 
cgit v1.2.3


From 103ba68067ec44873f644bad8591a74023fe764f Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Tue, 23 Aug 2022 13:33:26 +0530
Subject: Add second production on tux02 issue.

---
 issues/systems/second-production-tux02.gmi | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 issues/systems/second-production-tux02.gmi

diff --git a/issues/systems/second-production-tux02.gmi b/issues/systems/second-production-tux02.gmi
new file mode 100644
index 0000000..161629a
--- /dev/null
+++ b/issues/systems/second-production-tux02.gmi
@@ -0,0 +1,9 @@
+# Second production on tux02
+
+* assigned: aruni
+
+Set up a second production system on tux02. This will be fully configured using Guix and will be able to roll back to previous states easily. The Guix configuration of this system should go into the genenetwork-machines repo.
+=> https://git.genenetwork.org/arunisaac/genenetwork-machines genenetwork-machines repo
+
+This issue likely obsoletes
+=> /issues/systems/tux02-production
-- 
cgit v1.2.3


From ddd3d8782536eb2ff020c82590c75a48c4943233 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Tue, 23 Aug 2022 16:06:11 +0530
Subject: Capitalize authgroup openconnect-sso argument.

---
 topics/uthsc-vpn-with-free-software.gmi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index f7f9fe0..05f389b 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -6,7 +6,7 @@ It is possible to connect to the UTHSC VPN using only free software. For this, y
 
 To connect, run openconnect-sso as follows and enter your password when prompted. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN.
 ```
-$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup uthsc
+$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup UTHSC
 ```
 
 ## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling)
@@ -16,7 +16,7 @@ openconnect, by default, tunnels all your traffic through the VPN. This is not g
 
 For example, to connect to the UTHSC VPN but only access the hosts tux01 and tux02e through the VPN, run the following command.
 ```
-$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup uthsc -- --script 'vpn-slice tux01 tux02e'
+$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup UTHSC -- --script 'vpn-slice tux01 tux02e'
 ```
 The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation.
 
-- 
cgit v1.2.3


From 4a59004bad69ea1bd2cadcf1e2c3446ccbd85be8 Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Tue, 23 Aug 2022 21:37:49 +0300
Subject: Add a design-doc for better logging

---
 topics/better-logging.gmi | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 topics/better-logging.gmi

diff --git a/topics/better-logging.gmi b/topics/better-logging.gmi
new file mode 100644
index 0000000..4c216aa
--- /dev/null
+++ b/topics/better-logging.gmi
@@ -0,0 +1,38 @@
+# Improving Logging in GN2
+
+## What Are We Trying To Solve?
+
+We prioritise maintaining user functionality over speed in GN [with time this speed will be improved].  As such we should be pay more attention at not breaking any currently working GN2 functionality.  And when/if we do, trouble-shooting should be easy.  On this front, one way is to stream-line logging in both GN2/GN3 and make it more script friendly - only report when something fails, not to instrument variables - and in so doing make the process of monitoring easier.
+
+## Goals
+
+- Remove noise from GN2.
+
+- Separate logging into different files: error logs, info logs.  Add this somewhere with Flask itself instead of re-directing STDOUT to a file.
+
+### Non-goals
+
+- Logging in GN3.
+
+- Parsing logs to extract goals.
+
+- Getting rid of "gn.db" global object and in so doing removing "MySqlAlchemy" [that we really shouldn't be using].
+
+- Adding log messages to existing functions.
+
+## Actual Design
+
+- Configure logger to separate logs into different files:
+  - INFO file - contains initial bootstrap messages and instrumentation we may have to do.
+  - ERROR file - contains real errors.
+
+- Have those settings - the different log files - be part of GN2 start-up script.
+
+- For error messages, use the following format for different error messages:
+  - "DATABASE: <message>"
+  - "REDIS: <message>"
+  - "COMPUTATION: <message>"
+  - "API: <message>"
+  - "MISC: <message>"
+
+- Have time-stamped logs by month. E.g. "genenetwork2-08-2022.error.log" and "genenetwork2-08-2022.info.log".  This way in future, we can actually run an analysis on what breaks often in GN2.
-- 
cgit v1.2.3


From b83285f518421f2e223ca6bd95332d82c270f7fa Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Tue, 23 Aug 2022 21:42:56 +0300
Subject: Update issue on 404 logs

---
 issues/genenetwork/issue-404-in-logs.gmi | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/issues/genenetwork/issue-404-in-logs.gmi b/issues/genenetwork/issue-404-in-logs.gmi
index 0006896..8e69838 100644
--- a/issues/genenetwork/issue-404-in-logs.gmi
+++ b/issues/genenetwork/issue-404-in-logs.gmi
@@ -1,6 +1,4 @@
-# 404 error in logs
-
-We get many 404's in GN logs. Can we rewire that so no log entries appear as a full stack dump?
+# Better Logging
 
 ## Tags
 
@@ -14,12 +12,9 @@ We get many 404's in GN logs. Can we rewire that so no log entries appear as a f
 
 => https://flask.palletsprojects.com/en/2.0.x/errorhandling/
 
-Some of those 404's in our log
-mean that we forgot to package something; for
-example:
+Some of those 404's in our log mean that we forgot to package something; for example:
 
-=>
-https://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics/commit/e80fe4ddcf15e21004b8135cf8af34b458697f64
+=> https://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics/commit/e80fe4ddcf15e21004b8135cf8af34b458697f64
 
 Removing the 404's would prevent us from catching important errors if ever they occur. I suggest we fix the 404's; some of them have a cascading effect, like the font-awesome missing "webfonts" folder I just fixed that leads to a lot of unnecessary 404s.
 
-- 
cgit v1.2.3


From cfdcd069af40a0cc55014c4ba8c154b41d8cce87 Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Tue, 23 Aug 2022 22:14:25 +0300
Subject: Fix sub-lists in "better-logging"

---
 topics/better-logging.gmi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/topics/better-logging.gmi b/topics/better-logging.gmi
index 4c216aa..9ca8e42 100644
--- a/topics/better-logging.gmi
+++ b/topics/better-logging.gmi
@@ -23,16 +23,23 @@ We prioritise maintaining user functionality over speed in GN [with time this sp
 ## Actual Design
 
 - Configure logger to separate logs into different files:
+
   - INFO file - contains initial bootstrap messages and instrumentation we may have to do.
+
   - ERROR file - contains real errors.
 
 - Have those settings - the different log files - be part of GN2 start-up script.
 
 - For error messages, use the following format for different error messages:
+
   - "DATABASE: <message>"
+
   - "REDIS: <message>"
+
   - "COMPUTATION: <message>"
+
   - "API: <message>"
+
   - "MISC: <message>"
 
 - Have time-stamped logs by month. E.g. "genenetwork2-08-2022.error.log" and "genenetwork2-08-2022.info.log".  This way in future, we can actually run an analysis on what breaks often in GN2.
-- 
cgit v1.2.3


From 6eae4be03f06c29ceb9f167d6b95653378f39087 Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Tue, 23 Aug 2022 23:15:34 +0300
Subject: Update better-logging topic

---
 topics/better-logging.gmi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/topics/better-logging.gmi b/topics/better-logging.gmi
index 9ca8e42..ea9fd26 100644
--- a/topics/better-logging.gmi
+++ b/topics/better-logging.gmi
@@ -6,6 +6,8 @@ We prioritise maintaining user functionality over speed in GN [with time this sp
 
 ## Goals
 
+- Have script-friendly error/info logs.
+
 - Remove noise from GN2.
 
 - Separate logging into different files: error logs, info logs.  Add this somewhere with Flask itself instead of re-directing STDOUT to a file.
@@ -43,3 +45,5 @@ We prioritise maintaining user functionality over speed in GN [with time this sp
   - "MISC: <message>"
 
 - Have time-stamped logs by month. E.g. "genenetwork2-08-2022.error.log" and "genenetwork2-08-2022.info.log".  This way in future, we can actually run an analysis on what breaks often in GN2.
+
+- Get rid of "utility.logger" module and replace it with Flask's or Python's in-built logging.
-- 
cgit v1.2.3


From c7a3db98d06d2088ed8fb6656587600a406e8d8f Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 00:09:23 +0530
Subject: Remove --user argument from openconnect-sso invocation.

---
 topics/uthsc-vpn-with-free-software.gmi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index 05f389b..6131004 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -4,9 +4,9 @@ It is possible to connect to the UTHSC VPN using only free software. For this, y
 => https://github.com/vlaci/openconnect-sso/ openconnect-sso
 => https://www.infradead.org/openconnect/ openconnect
 
-To connect, run openconnect-sso as follows and enter your password when prompted. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN.
+To connect, run openconnect-sso as follows. A browser window will pop up for you to complete the Duo authentication. Once done, you will be connected to the VPN.
 ```
-$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup UTHSC
+$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC
 ```
 
 ## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling)
@@ -16,7 +16,7 @@ openconnect, by default, tunnels all your traffic through the VPN. This is not g
 
 For example, to connect to the UTHSC VPN but only access the hosts tux01 and tux02e through the VPN, run the following command.
 ```
-$ openconnect-sso --server uthscvpn1.uthsc.edu --user your-netid --authgroup UTHSC -- --script 'vpn-slice tux01 tux02e'
+$ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -- --script 'vpn-slice tux01 tux02e'
 ```
 The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation.
 
-- 
cgit v1.2.3


From e7fa0bb076aa30f86e228c05f73a134bb7f1d636 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 00:09:49 +0530
Subject: Add note about sudo use with openconnect-sso.

---
 topics/uthsc-vpn-with-free-software.gmi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index 6131004..abb7e4a 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -8,6 +8,7 @@ To connect, run openconnect-sso as follows. A browser window will pop up for you
 ```
 $ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC
 ```
+Note that openconnect-sso should be run as a regular user, not as root. After passing Duo authentication, openconnect-sso will try to gain root priviliges to set up the network routes. At that point, it will prompt you for your password using sudo.
 
 ## Avoid tunneling all your network traffic through the VPN (aka Split Tunneling)
 
-- 
cgit v1.2.3


From 9e44dd42b15cc40fe788adbf6ff78c0da4ee9448 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 00:10:03 +0530
Subject: Add note about vpn-slice packaging status in Guix.

---
 topics/uthsc-vpn-with-free-software.gmi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index abb7e4a..8faaffb 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -21,6 +21,9 @@ $ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -- --script 'vp
 ```
 The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation.
 
+Note that the vpn-slice package is not yet packaged for Guix. However, there is a pending patch at
+=> https://issues.guix.gnu.org/57351 Guix issue tracking vpn-splice packaging
+
 ## Acknowledgement
 
 Many thanks to Pjotr Prins and Erik Garrison without whose earlier work this guide would not be possible.
-- 
cgit v1.2.3


From 033cc2228d9277c4dd42e8d9160b7d2966dc2bc7 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 00:13:53 +0530
Subject: Document qtwebengine workaround for openconnect-sso.

---
 topics/uthsc-vpn-with-free-software.gmi | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index 8faaffb..ca0d8df 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -24,6 +24,15 @@ The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and
 Note that the vpn-slice package is not yet packaged for Guix. However, there is a pending patch at
 => https://issues.guix.gnu.org/57351 Guix issue tracking vpn-splice packaging
 
+## qtwebengine text rendering bug
+
+There is currently a bug in Guix with qtwebengine text rendering.
+=> https://issues.guix.gnu.org/52672
+This causes text to not render in the Duo authentication browser window. Until this bug is fixed, work around it by setting the following environment variable.
+```
+export QTWEBENGINE_CHROMIUM_FLAGS=--disable-seccomp-filter-sandbox
+```
+
 ## Acknowledgement
 
 Many thanks to Pjotr Prins and Erik Garrison without whose earlier work this guide would not be possible.
-- 
cgit v1.2.3


From 574900c30dc5576d04c5306187df4fe2e1785ef0 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 11:39:50 +0530
Subject: Use exceptions to indicate errors.

---
 topics/use-exceptions-to-indicate-errors.gmi | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 topics/use-exceptions-to-indicate-errors.gmi

diff --git a/topics/use-exceptions-to-indicate-errors.gmi b/topics/use-exceptions-to-indicate-errors.gmi
new file mode 100644
index 0000000..e302dd3
--- /dev/null
+++ b/topics/use-exceptions-to-indicate-errors.gmi
@@ -0,0 +1,16 @@
+# Use exceptions to indicate errors
+
+Often, we indicate that a function has encountered an error by returning a None value. Here's why this is a bad idea and why you should use exceptions instead.
+
+When we return None values to indicate errors, we have to take care to check the return value of every function call and propagate errors higher and higher up the function call stack until we reach a point where the error is handled. This clutters up the code, and is one reason why writing correct code in languages like C that don't have exceptions is a pain.
+
+With exceptions, we only have to create an exception handler (try/except block in Python) at the highest level. Any exception raised by functions below that level are automatically passed on to the except block with no additional programmer effort.
+
+Here's an example where we run mapping, and if there's an error, we return an error page. Else, we return the results page. Notice that we do not check the return value template_vars.
+```
+try:
+    template_vars = run_mapping.RunMapping(start_vars, temp_uuid)
+    return render_template("mapping_results.html", **template_vars)
+except:
+    return render_template("mapping_error.html")
+```
-- 
cgit v1.2.3


From 4bc1c4c4c7563d51b1258409de0d6663dfa5c726 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 25 Aug 2022 11:40:08 +0530
Subject: Collect coding standards documents.

---
 topics/coding-guidelines.gmi | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 topics/coding-guidelines.gmi

diff --git a/topics/coding-guidelines.gmi b/topics/coding-guidelines.gmi
new file mode 100644
index 0000000..47cb697
--- /dev/null
+++ b/topics/coding-guidelines.gmi
@@ -0,0 +1,8 @@
+# Coding guidelines
+
+We aim to adhere to the following coding guidelines.
+
+=> /topics/use-exceptions-to-indicate-errors Exceptions, not None return values
+=> /topics/better-logging Log messages
+
+This document is an index of other documents describing coding guidelines. Add more here as you write/discover them.
-- 
cgit v1.2.3


From dfacbce921f752098b7b3360c99b2448ce2c1bb5 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 26 Aug 2022 10:22:34 +0200
Subject: Updated tasks

---
 issues/genenetwork/global-search.gmi | 13 ++++++++-----
 tasks/ongoing.gmi                    | 28 ++++++++++++++++++++++++++++
 tasks/zsloan.gmi                     |  4 +---
 3 files changed, 37 insertions(+), 8 deletions(-)
 create mode 100644 tasks/ongoing.gmi

diff --git a/issues/genenetwork/global-search.gmi b/issues/genenetwork/global-search.gmi
index a39da80..156145d 100644
--- a/issues/genenetwork/global-search.gmi
+++ b/issues/genenetwork/global-search.gmi
@@ -1,16 +1,19 @@
 # Global search problems
 
-Global search is the top bar of GN2
+Global search is the top bar of GN2.
+
+Note we are replacing search with xapian. So this is less important.
 
 ## Tags
 
 * assigned: pjotrp, zsloan
-* status: unclear
-* priority: critical
+* status: later
+* priority: low
 * type: bug
 * keywords: global search, BRCA2
 
 ## Tasks
 
-* [ ] BRCA2 does not render results in table
-* [ ] 'Brca2' with quotes gives a SQL error
+* [X] BRCA2 does not render results in table
+* [ ] 'Brca2' with quotes gives a SQL error, see
+=> http://genenetwork.org/gsearch?type=gene&terms=%27Brca2%27
diff --git a/tasks/ongoing.gmi b/tasks/ongoing.gmi
new file mode 100644
index 0000000..799f5ee
--- /dev/null
+++ b/tasks/ongoing.gmi
@@ -0,0 +1,28 @@
+# Ongoing Work
+
+In this document we track ongoing work.
+
+## Tags
+
+* kanban: pjotrp
+* assigned: pjotrp
+* status: ongoing
+* keywords: pjotrp, to remove
+
+## Notes
+
+## Tasks
+
+Ongoing
+
+* [ ] Rust correlations and materialised view (Fred, Alex)
+* [ ] Fix missing GENO file (Zach, Pjotr, Gregory)
+* [ ] Bringing Xapian Search to GN (Arun)
+* [ ] New genotype format (Arun, Pjotr, Bonface)
+* [ ] RDF and virtuoso endpoint (Bonface, Arun, Pjotr)
+* [ ] Data upload and curation (Fred, Arthur)
+* [ ] Deployment container for GN2 (Arun)
+* [ ] GeneNetwork paper (Pjotr, Rob)
+* [ ] Creating a live monitor/sanity checker for GN (Arun)
+* [ ] Pangenome genotype correction with vcflib (Pjotr)
+* [ ] Becoming a DOI provider (Pjotr)
diff --git a/tasks/zsloan.gmi b/tasks/zsloan.gmi
index 30cba44..4e425e6 100644
--- a/tasks/zsloan.gmi
+++ b/tasks/zsloan.gmi
@@ -3,13 +3,11 @@
 ## Tags
 
 * assigned: zsloan
-* keywords: global search, genewiki
+* keywords: genewiki
 * status: unclear
 * priority: unclear
 
 ## Tasks
 
-* [ ] Fix global search
-=> ../issues/genenetwork/global-search.gmi fix global search
 * [ ] GN1 migrate genewiki pages
 => ../issues/genenetwork/genewiki.gmi
-- 
cgit v1.2.3


From f3d4e05fc859920ee2311116573dd8cfd3438fc2 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 26 Aug 2022 04:44:37 -0500
Subject: Add issue on missing geno file as a task for Zach

---
 tasks/zsloan.gmi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tasks/zsloan.gmi b/tasks/zsloan.gmi
index 4e425e6..2da0c55 100644
--- a/tasks/zsloan.gmi
+++ b/tasks/zsloan.gmi
@@ -9,5 +9,7 @@
 
 ## Tasks
 
+* [ ] Geno files incomplete?
+=> http://gn2.genenetwork.org/api/v_pre1//genotypes/HSNIH-Palmer.geno
 * [ ] GN1 migrate genewiki pages
 => ../issues/genenetwork/genewiki.gmi
-- 
cgit v1.2.3


From a6299a491b89d45c5d43d4044ecdc2e5e351cfdd Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 26 Aug 2022 05:52:42 -0500
Subject: One GeneWiki

---
 issues/genenetwork/genewiki.gmi | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/issues/genenetwork/genewiki.gmi b/issues/genenetwork/genewiki.gmi
index e0a0a00..ce54afb 100644
--- a/issues/genenetwork/genewiki.gmi
+++ b/issues/genenetwork/genewiki.gmi
@@ -25,3 +25,40 @@ with an edit button, similar to
 * keywords: GN1, documentation
 
 ## Tasks
+
+* [ ] Export Genewiki to markdown - one file per gene and store in git@github.com:genenetwork/gn-docs.git
+* [ ] Format output for GN using markdown parser (similar to other docs)
+* [ ] Provide edit link to github
+
+Later we'll add automated links to wikidata and Uniprot etc.
+
+## Notes
+
+Zach writes: How exactly do we want to store all of this? It appears to currently be
+stored across three SQL tables - GeneRIF, GeneRIFXRef, and GeneCategory.
+The first contains a row for each item a user adds (when displaying all
+items it queries by gene symbol), and the latter two are for storing the
+checkbox stuff (so there will presumably be a row in GeneRIFXRef for every
+checked box for each symbol, though this isn't totally clear to me because
+it's linked by GeneRIF.Id - which isn't unique - rather than GeneRIF.symbol
+which is what I would have assumed).
+
+IIRC the issue I ran into (that isn't immediately apparent from looking at
+the web page) is that it's currently stored as a list of items. There isn't
+a single "free text" area - when a user edits they are either adding a new
+text item with its own row in the DB or editing one of the existing items,
+so I'm not sure how best to reasonably convert the current contents and
+editing method to markdown. Currently it doesn't even support any sort of
+user styling/formatting - users just enter basic text into a form. And if
+they were converted to markdown, how would we be storing the checkbox
+content?
+
+It's probably possible to write a script that goes through those tables and
+generates a bunch of markdown files from them (one for each gene symbol, I
+think?), with the list of items just being converted into a single markdown
+file with those items formatted into a list. This would de-link GN1's
+GeneWiki from GN2's in the future, though (since the way things are stored
+would be fundamentally changed).
+
+Pj: That is what we want. Create a markdown file for each gene symbol.
+Checklist can be part of that using markdown syntax.
-- 
cgit v1.2.3


From 185ab8cc3e621f3257b525a8ca5ae7a2281d055e Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Mon, 29 Aug 2022 12:32:05 +0300
Subject: Update topic on better logging.

* topics/better-logging.gmi: Update design-doc.
---
 topics/better-logging.gmi | 44 +++++++++++---------------------------------
 1 file changed, 11 insertions(+), 33 deletions(-)

diff --git a/topics/better-logging.gmi b/topics/better-logging.gmi
index ea9fd26..8de3fb3 100644
--- a/topics/better-logging.gmi
+++ b/topics/better-logging.gmi
@@ -6,44 +6,22 @@ We prioritise maintaining user functionality over speed in GN [with time this sp
 
 ## Goals
 
-- Have script-friendly error/info logs.
-
-- Remove noise from GN2.
-
-- Separate logging into different files: error logs, info logs.  Add this somewhere with Flask itself instead of re-directing STDOUT to a file.
+* Have script-friendly error/info logs.
+* Remove noise from GN2.
+* Separate logging into different files: error logs, info logs.  Add this somewhere with Flask itself instead of re-directing STDOUT to a file.
 
 ### Non-goals
 
-- Logging in GN3.
-
-- Parsing logs to extract goals.
-
-- Getting rid of "gn.db" global object and in so doing removing "MySqlAlchemy" [that we really shouldn't be using].
-
-- Adding log messages to existing functions.
+* Logging in GN3.
+* Parsing logs to extract goals.
+* Getting rid of "gn.db" global object and in so doing removing "MySqlAlchemy" [that we really shouldn't be using].
+* Adding log messages to existing functions.
 
 ## Actual Design
 
-- Configure logger to separate logs into different files:
-
-  - INFO file - contains initial bootstrap messages and instrumentation we may have to do.
-
-  - ERROR file - contains real errors.
-
-- Have those settings - the different log files - be part of GN2 start-up script.
-
-- For error messages, use the following format for different error messages:
-
-  - "DATABASE: <message>"
-
-  - "REDIS: <message>"
-
-  - "COMPUTATION: <message>"
-
-  - "API: <message>"
-
-  - "MISC: <message>"
+* Get rid of "utility.logger" module and replace it with Flask's or Python's in-built logging.
+* Configure the logging system to automatically add the module name, line number, time-stamps etc.
 
-- Have time-stamped logs by month. E.g. "genenetwork2-08-2022.error.log" and "genenetwork2-08-2022.info.log".  This way in future, we can actually run an analysis on what breaks often in GN2.
+## Resources
 
-- Get rid of "utility.logger" module and replace it with Flask's or Python's in-built logging.
+=> https://realpython.com/python-logging/ Logging in Python
-- 
cgit v1.2.3


From 0340128c2b67684b356f64b42483cf268ccb3e12 Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Mon, 29 Aug 2022 12:33:13 +0300
Subject: Make task as done with a '[x]'

* issues/sql-too-many-connections.gmi: Mark tasks as done.
---
 issues/sql-too-many-connections.gmi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/issues/sql-too-many-connections.gmi b/issues/sql-too-many-connections.gmi
index 68ed23d..93b8587 100644
--- a/issues/sql-too-many-connections.gmi
+++ b/issues/sql-too-many-connections.gmi
@@ -8,8 +8,8 @@
 
 ## Tasks
 
-* [ ] Figure out root cause
-* [ ] Send patch
+* [x] Figure out root cause
+* [x] Send patch
 
 ## Description
 
-- 
cgit v1.2.3


From aaa313ba866721059541c6589ecb2d6092eb03ca Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Mon, 29 Aug 2022 12:39:44 +0300
Subject: Create new issue wrt removing sqlalchemy

* issues/sqlalchemy.gmi: New issue.
---
 issues/sqlalchemy.gmi | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 issues/sqlalchemy.gmi

diff --git a/issues/sqlalchemy.gmi b/issues/sqlalchemy.gmi
new file mode 100644
index 0000000..e3ea894
--- /dev/null
+++ b/issues/sqlalchemy.gmi
@@ -0,0 +1,59 @@
+# Replace sqlalchemy with MySQLdb
+
+## Tags
+
+* assigned: bonfacem, zachs
+* type: refactor
+* priority: medium
+
+## Description
+
+Connections that use sqlalchemy are the only placen in GN2 where connections remain "open" indefinitely until a connection is closed.  In the event that we have many users at the same time, say like during one of Rob's classes; and they do a search, we have N connections indefinitely open until their sessions are killed.  And removing that is trivial, and to demonstrate that using a random example from GN2 (/wqflask/wqflask/search_results.py):
+
+```
+def get_GO_symbols(a_search):
+    query = """SELECT genes
+	       FROM GORef
+	       WHERE goterm='{0}:{1}'""".format(a_search['key'], a_search['search_term'][0])
+
+    gene_list = g.db.execute(query).fetchone()[0].strip().split()
+
+    new_terms = []
+    for gene in gene_list:
+	this_term = dict(key=None,
+			 separator=None,
+			 search_term=[gene])
+
+	new_terms.append(this_term)
+
+    return new_terms
+```
+
+could be replaced with:
+
+```
+ def get_GO_symbols(a_search):
+-    query = """SELECT genes
+-               FROM GORef
+-               WHERE goterm='{0}:{1}'""".format(a_search['key'], a_search['search_term'][0])
+-
+-    gene_list = g.db.execute(query).fetchone()[0].strip().split()
+-
+-    new_terms = []
+-    for gene in gene_list:
+-        this_term = dict(key=None,
+-                         separator=None,
+-                         search_term=[gene])
+-
+-        new_terms.append(this_term)
+-
+-    return new_terms
++    genes = []
++    with database_connection() as conn:
++        with conn.cursor() as cursor:
++            cursor.execute("SELECT genes FROM GORef WHERE goterm=%s",
++                         (a_search.get("key")))
++            genes = cursor.fetchone()[0].strip().split()
++    return [dict(key=None, separator=None, search_term=[gene])
++            for gene in genes]
+```
-- 
cgit v1.2.3


From 79db33c930e5d679c8d4da3740e374a82b7c6be8 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Tue, 30 Aug 2022 15:21:35 +0530
Subject: Close genodb tests issue.

---
 issues/tests-for-genodb.gmi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/issues/tests-for-genodb.gmi b/issues/tests-for-genodb.gmi
index e4398d2..957dca7 100644
--- a/issues/tests-for-genodb.gmi
+++ b/issues/tests-for-genodb.gmi
@@ -8,3 +8,9 @@ The genodb genotype database implementation is explained in detail at
 => /topics/genotype-database.html
 
 * assigned: aruni
+
+## Resolution
+
+Tests have now been written.
+
+* closed
-- 
cgit v1.2.3


From c0e505604a56a86d9c9e78f0823de95e0bcfb40b Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Wed, 31 Aug 2022 18:47:08 +0530
Subject: Remove note about vpn-slice packaging status.

---
 topics/uthsc-vpn-with-free-software.gmi | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/topics/uthsc-vpn-with-free-software.gmi b/topics/uthsc-vpn-with-free-software.gmi
index ca0d8df..1593c3a 100644
--- a/topics/uthsc-vpn-with-free-software.gmi
+++ b/topics/uthsc-vpn-with-free-software.gmi
@@ -21,9 +21,6 @@ $ openconnect-sso --server uthscvpn1.uthsc.edu --authgroup UTHSC -- --script 'vp
 ```
 The vpn-slice script looks up the hostnames tux01 and tux02e on the VPN DNS and adds /etc/hosts entries and routes to your system. vpn-slice can also set up more complicated routes. To learn more, read the vpn-slice documentation.
 
-Note that the vpn-slice package is not yet packaged for Guix. However, there is a pending patch at
-=> https://issues.guix.gnu.org/57351 Guix issue tracking vpn-splice packaging
-
 ## qtwebengine text rendering bug
 
 There is currently a bug in Guix with qtwebengine text rendering.
-- 
cgit v1.2.3


From 795ba2ffb5ed5150004785768b8b8c479b24b197 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 2 Sep 2022 07:22:08 -0500
Subject: Collapsed P2 resolving

---
 topics/systems/migrate-p2.gmi    | 12 ++++++++++++
 topics/systems/orchestration.gmi | 31 +++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 topics/systems/migrate-p2.gmi
 create mode 100644 topics/systems/orchestration.gmi

diff --git a/topics/systems/migrate-p2.gmi b/topics/systems/migrate-p2.gmi
new file mode 100644
index 0000000..c7fcb90
--- /dev/null
+++ b/topics/systems/migrate-p2.gmi
@@ -0,0 +1,12 @@
+* Penguin2 crash
+
+This week the boot partition of P2 crashed. We have a few lessons here, not least having a fallback for all services ;)
+
+* Tasks
+
+- [ ] setup space.uthsc.edu for GN2 development
+- [ ] update DNS to tux02 128.169.4.52 and space 128.169.5.175
+- [ ] move CI/CD to tux02
+
+
+* Notes
diff --git a/topics/systems/orchestration.gmi b/topics/systems/orchestration.gmi
new file mode 100644
index 0000000..336dbbd
--- /dev/null
+++ b/topics/systems/orchestration.gmi
@@ -0,0 +1,31 @@
+* Orchestration and fallbacks
+
+After the Penguin2 crash in Aug. 2022 it has become increasingly clear how hard it is to deploy GeneNetwork. GNU Guix helps a great deal with dependencies, but it does not handle orchestration between machines/services well. Also we need to look at the future.
+
+What is GN today in terms of services
+
+ 1. Main GN2 server (Python, 20+ processes, 3+ instances: depends on all below)
+ 2. Matching GN3 server and REST endpoint (Python: less dependencies)
+ 3. Mariadb
+ 4. redis
+ 5. virtuoso
+ 6. GN-proxy (Racket, authentication handler: redis, mariadb)
+ 7. Alias proxy (Racket, gene aliases wikidata)
+ 8. Jupyter R and Julia notebooks
+ 9. BNW server (Octave)
+10. UCSC browser
+11. GN1 instances (older python, 12 instances in principle, 2 running today)
+12. Access to HPC for GEMMA (coming)
+13. Backup services
+14. monitoring services
+
+I am still missing a few! All run by a man and his diligent dog.
+
+For the future the orchestration needs to be more robust and resilient. This means:
+
+ 1. A fallback for every service on a separate machine
+ 2. Improved privacy protection for (future) human data
+ 3. Separate servers serving different data sources
+ 4. Partial synchronization between data sources
+
+The only way we *can* scale is by adding machines. But the system is not yet ready for that. Also getting rid of monolithic primary databases in favor of files helps synchronization.
-- 
cgit v1.2.3


From e3908e51e6a6f982b9b812ed0b839b553fb376f0 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 3 Sep 2022 04:25:49 -0500
Subject: tux01 OOM issue

---
 issues/systems/tux01-ram-problem.gmi | 108 +++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)
 create mode 100644 issues/systems/tux01-ram-problem.gmi

diff --git a/issues/systems/tux01-ram-problem.gmi b/issues/systems/tux01-ram-problem.gmi
new file mode 100644
index 0000000..fd7848c
--- /dev/null
+++ b/issues/systems/tux01-ram-problem.gmi
@@ -0,0 +1,108 @@
+# tux01 running out of RAM
+
+Tux01 ran out of steam.
+
+## Tags
+
+* assigned: pjotrp, zsloan
+* type: bug
+* keywords: database
+* status: unclear
+* priority: high
+
+## Tasks
+
+* [X] post-mortem (see below)
+* [ ] update nvme firmware
+* [ ] convert remaining tables to innodb
+* [ ] monitor mariadb internals
+* [ ] find out what can have caused an OOM
+
+## Notes
+
+Some post mortem:
+
+* GN1 uses 10% of RAM, that is a bit high
+* Other services behaving fine
+* dirs look fine though /home only has 80G left
+* dmesg shows serial console crashes
+  + kthread starved
+  + RIP: 0010:serial8250_console_write+0x3d/0x2b0
+  + Out of memory: Kill process 4361 (mysqld)
+  + mysqld was using 14006154 pages of 4096 size = 53Gb RAM
+* daemon log shows restart of mysql at 2am
+* syslog: Sep  3 02:07:01 tux01 kernel: [18254757.549855] oom_reaper: reaped process 4361 (mysqld), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
+
+On to the mysql logs, after the crash
+
+* 2022-09-03  2:07:50 68 [ERROR] mysqld: Table './db_webqtl/CaseAttributeXRefNew' is marked as crashed and should be repaired
+* 2022-09-03  2:07:50 9 [ERROR] mysqld: Table './db_webqtl/GeneRIF' is marked as crashed and should be repaired
+
+right before the crash
+
+* 2022-09-03  2:05:16 0 [Note] InnoDB: page_cleaner: 1000ms intended loop took 5476ms. The settings might not be optimal. (flushed=0 and evicted=0, during the time.)
+
+The mysql slow query log shows a number of slow queries before 2am. Which is not normal. So it seems it was leading up to a crash. Most of these queries refer to GeneRIF:
+
+```
+# Time: 220903  1:25:34
+# User@Host: webqtlout[webqtlout] @  [128.169.4.67]
+# Query_time: 772.880949  Lock_time: 0.001206  Rows_sent: 157432  Rows_examined: 472318
+# Rows_affected: 0  Bytes_sent: 29887236
+SET timestamp=1662168334;
+select distinct Species.FullName, GeneRIF_BASIC.GeneId, GeneRIF_BASIC.comment, GeneRIF_BASIC.PubMed_ID from GeneRIF_BASIC, Species where GeneRIF_BASIC.symbol=''OR ELT(6676=1964,1964) AND '4YK4' LIKE '4YK4' and GeneRIF_BASIC.SpeciesId = Species.Id order by Species.Id, GeneRIF_BASIC.createtime;
+# Time: 220903  1:26:31
+```
+
+let's try to run that by hand. It returns 157432 rows in set (2.523 sec). So it is fine now. It might be that on reboot the table got fixed, but we'll check the tables anyway. First take a look at the state of the engine itself as described in
+
+=> ../database-not-responding.gmi
+
+Also
+
+```
+MariaDB [db_webqtl]> CHECK TABLE GeneRIF;
++-------------------+-------+----------+----------+
+| Table             | Op    | Msg_type | Msg_text |
++-------------------+-------+----------+----------+
+| db_webqtl.GeneRIF | check | status   | OK       |
++-------------------+-------+----------+----------+
+1 row in set (0.014 sec)
+```
+
+So the tables were repaired on restarting mariadb - something we set it up to do. We should convert these tables to innodb (from myisam), but I have been postponing that until we have a large enough SSD for mariadb.
+
+## Check RAID and disks
+
+/dev/sda is on a PERC H740P Adp. controller. A quick search shows that there are no real known issues with these RAID controllers after 4 years. Pretty impressive.
+
+The following show no errors logged:
+
+```
+hdparm -I /dev/sda
+smartctl -a /dev/sda -d megaraid,0
+```
+
+Same for disk /dev/sdb and
+
+```
+smartctl -x /dev/nvme0
+smartctl -x /dev/nvme1
+```
+
+It looks like there is nothing to worry about.
+
+A search for nvme 'Dell Express Flash PM1725a problems' shows this an issue where disks go offline and that can be solved with Dell Express Flash NVMe PCIe SSD PM1725a, version 1.1.2, A03.
+We are on 1.0.4.
+
+=> https://www.dell.com/support/kbdoc/fr-fr/000177934/dell-technologies-a-pm1725a-may-go-offline-with-various-errors-including-nvme-remove-namespaces?lang=en
+
+Dell engineers have observed an infrequent issue during system operations, using the Dell PM1725a Express Flash NVMe PCIe SSD, in which the device may go offline and remain inaccessible. The drive may be accessible again after a reboot.
+
+The disks /dev/sda and /dev/sdb is Model Family: Seagate Barracuda 2.5 5400 Device Model:     ST5000LM000-2AN170 and appear to be behaving well.
+
+## Conclusion
+
+No real problems surface on those checks. So it looks like a table went out of wack and killed mariadb. It does not explain the RAM issue though. Why the the OOM killer had mariadb killed at 50Gb? It was the largest process, but not all RAM was used.
+
+Recommendations: see tasks above
-- 
cgit v1.2.3


From dbec2c34435511b492b3c6a66e4b935da447090e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 3 Sep 2022 04:31:44 -0500
Subject: tux01 OOM issue - update tasks

---
 issues/systems/tux01-ram-problem.gmi | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/issues/systems/tux01-ram-problem.gmi b/issues/systems/tux01-ram-problem.gmi
index fd7848c..90b37a0 100644
--- a/issues/systems/tux01-ram-problem.gmi
+++ b/issues/systems/tux01-ram-problem.gmi
@@ -5,14 +5,15 @@ Tux01 ran out of steam.
 ## Tags
 
 * assigned: pjotrp, zsloan
-* type: bug
+* type: systems
 * keywords: database
 * status: unclear
-* priority: high
+* priority: medium
 
 ## Tasks
 
 * [X] post-mortem (see below)
+* [ ] free up disk space
 * [ ] update nvme firmware
 * [ ] convert remaining tables to innodb
 * [ ] monitor mariadb internals
-- 
cgit v1.2.3


From c216fc75dbffe3e9ace4369a59256f0c93f72368 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 3 Sep 2022 04:42:45 -0500
Subject: orchestration: adding services

---
 topics/systems/orchestration.gmi | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/topics/systems/orchestration.gmi b/topics/systems/orchestration.gmi
index 336dbbd..4f4c877 100644
--- a/topics/systems/orchestration.gmi
+++ b/topics/systems/orchestration.gmi
@@ -16,8 +16,11 @@ What is GN today in terms of services
 10. UCSC browser
 11. GN1 instances (older python, 12 instances in principle, 2 running today)
 12. Access to HPC for GEMMA (coming)
-13. Backup services
-14. monitoring services
+13. Backup services (sheepdog, rsync, borg)
+14. monitoring services (incl. systemd, gunicorn, shepherd, sheepdog)
+15. mail server
+16. https certificates
+17. http(s) proxy (nginx)
 
 I am still missing a few! All run by a man and his diligent dog.
 
-- 
cgit v1.2.3


From cfe8b6a84d55949e13c92c622053d6b83468e681 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 3 Sep 2022 10:47:07 -0500
Subject: DNS update

---
 topics/systems/dns-changes.gmi | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/topics/systems/dns-changes.gmi b/topics/systems/dns-changes.gmi
index 7c42589..a535cab 100644
--- a/topics/systems/dns-changes.gmi
+++ b/topics/systems/dns-changes.gmi
@@ -9,15 +9,22 @@ We are moving thing to a new DNS hosting service. We have accounts on both. To m
 * Sign in to your GoDaddy account.
 * Export the DNS record to a file
 * Print the DNS settings to a PDF
-* Start a transfer from DNSsimple to get an auth code
+* On GoDaddy disable WHOIS privacy protection (on the domains table)
+* On GoDaddy start a transfer from DNSsimple to get an auth code
   + Click your username at the top right of the page.
   + Select My Products.
   + Click Manage next to the relevant domain.
   + Scroll down to Additional Settings.
   + Click Get authorization code. Note: If you have more than 6 domains in your account, click Email my code
-  + Set transfer on DNSsimple - tick DNS box
-  + Check DNS on switch - it may not be completely automatic
-  + Cherk record on DNSsimple
-  + Check transfer with `dig systemsgenetics.org NS`
 * On DNSimple add the authorisation code under Tamara
-* Import DNS settings on DNSimple
+  + Set transfer on DNSimple - tick DNS box
+  + Check the `DNS on' switch - it may not be completely automatic
+  + Cherk record on DNSimple
+  + Check transfer with `dig systemsgenetics.org NS`
+* Import DNS settings on DNSimple (cut-N-paste)
+  + Edit delegation - make sure the delegation box is set
+=> https://support.dnsimple.com/articles/delegating-dnsimple-registered
+* Test
+  + dig systemsgenetics.org [NS]
+  + dig systemsgenetics.org @ns1.dnsimple.com NS
+  + whois systemsgenetics.org
-- 
cgit v1.2.3


From f52cfbb325ad28cd743ea94b83859977f0063230 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 5 Sep 2022 08:57:16 -0500
Subject: orchestration

---
 topics/systems/orchestration.gmi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/topics/systems/orchestration.gmi b/topics/systems/orchestration.gmi
index 4f4c877..5e0a298 100644
--- a/topics/systems/orchestration.gmi
+++ b/topics/systems/orchestration.gmi
@@ -21,6 +21,7 @@ What is GN today in terms of services
 15. mail server
 16. https certificates
 17. http(s) proxy (nginx)
+18. CI/CD server (with github webhooks)
 
 I am still missing a few! All run by a man and his diligent dog.
 
-- 
cgit v1.2.3