about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-08-04 08:31:38 +0300
committerMuriithi Frederick Muriuki2021-08-04 08:31:38 +0300
commit9e4b8d17443464b673077d43c63f73ff0c6900ea (patch)
tree91cf8e20108f3a802dd81479cad1289c389b24ff
parent238450af8aa3395b3ae5a636fada67206a863d85 (diff)
parent32cfe15f056c0c2958d7da79d7a25e585d9cc13a (diff)
downloadgenenetwork3-9e4b8d17443464b673077d43c63f73ff0c6900ea.tar.gz
Merge branch 'main' of github.com:genenetwork/genenetwork3 into heatmap_decompose_db_retrieval
-rw-r--r--guix.scm15
-rw-r--r--sql/schema.org39
2 files changed, 45 insertions, 9 deletions
diff --git a/guix.scm b/guix.scm
index f20d62a..efef9c1 100644
--- a/guix.scm
+++ b/guix.scm
@@ -31,6 +31,7 @@
  (gnu packages databases)
  (gnu packages statistics)
  (gnu packages bioconductor)
+ (gn packages golang)
  (gnu packages python)
  (gnu packages python-check)
  (gnu packages python-crypto)
@@ -70,8 +71,8 @@
                       #:recursive? #t
                       #:select? git-file?))
   (propagated-inputs `(("coreutils" ,coreutils)
-                       ("diffutils" ,diffutils)
                        ("gemma-wrapper" ,gemma-wrapper)
+                       ("csvdiff" ,go-github-com-aswinkarthik-csvdiff)
                        ("python" ,python-wrapper)
                        ("python-bcrypt" ,python-bcrypt)
                        ("python-flask" ,python-flask)
@@ -84,15 +85,11 @@
                        ("python-redis" ,python-redis)
                        ("python-requests" ,python-requests)
                        ("python-scipy" ,python-scipy)
-                       ("python-sqlalchemy-stubs" ,python-sqlalchemy-stubs)
-                       ("r" ,r)
-                       ("r-qtl" ,r-qtl)
+                       ("python-sqlalchemy-stubs"
+                        ,python-sqlalchemy-stubs)
                        ("r-optparse" ,r-optparse)
-                       ("r-stringi" ,r-stringi)
-                       ("r-stringr" ,r-stringr)
-                       ("r-testthat" ,r-testthat)
-                       ("r-wgcna" ,r-wgcna)
-                       ))
+                       ("r-qtl" ,r-qtl)
+                       ("r-stringi" ,r-stringi)))
   (build-system python-build-system)
   (home-page "https://github.com/genenetwork/genenetwork3")
   (synopsis "GeneNetwork3 API for data science and machine learning.")
diff --git a/sql/schema.org b/sql/schema.org
new file mode 100644
index 0000000..2db8a27
--- /dev/null
+++ b/sql/schema.org
@@ -0,0 +1,39 @@
+#+TITLE: GeneNetwork Database Schema
+
+This is an attempt to reverse engineer and understand the schema of the
+GeneNetwork database. The goal is to prune redundant tables, fields, etc. and
+arrive at a simplified schema. This simplified schema will be useful when
+migrating the database.
+
+* Species
+** Id
+   Primary key
+** SpeciesId
+   Looks like a redundant key referred to as a foreign key from many other
+   tables. This field should be replaced by Id.
+** SpeciesName
+   Common name of the species. This field can be replaced by MenuName.
+** Name
+   Downcased common name used as key for the species in dictionaries
+** MenuName
+   Name in the Species dropdown menu. This is the SpeciesName, but sometimes
+   with the reference genome identifier mentioned in brackets.
+** FullName
+   Binomial name of the species
+** TaxonomyId
+   Foreign keys?
+** OrderId
+   Foreign keys?
+
+* Strain
+** Id
+   Primary key
+** Name
+   Name of the strain
+** Name2
+   A second name. For most rows, this is the same as Name. Why is this
+   necessary?
+** SpeciesId
+   Foreign key into the Species table
+** Symbol
+** Alias