aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-08-04 08:31:38 +0300
committerMuriithi Frederick Muriuki2021-08-04 08:31:38 +0300
commit9e4b8d17443464b673077d43c63f73ff0c6900ea (patch)
tree91cf8e20108f3a802dd81479cad1289c389b24ff
parent238450af8aa3395b3ae5a636fada67206a863d85 (diff)
parent32cfe15f056c0c2958d7da79d7a25e585d9cc13a (diff)
downloadgenenetwork3-9e4b8d17443464b673077d43c63f73ff0c6900ea.tar.gz
Merge branch 'main' of github.com:genenetwork/genenetwork3 into heatmap_decompose_db_retrieval
-rw-r--r--guix.scm15
-rw-r--r--sql/schema.org39
2 files changed, 45 insertions, 9 deletions
diff --git a/guix.scm b/guix.scm
index f20d62a..efef9c1 100644
--- a/guix.scm
+++ b/guix.scm
@@ -31,6 +31,7 @@
(gnu packages databases)
(gnu packages statistics)
(gnu packages bioconductor)
+ (gn packages golang)
(gnu packages python)
(gnu packages python-check)
(gnu packages python-crypto)
@@ -70,8 +71,8 @@
#:recursive? #t
#:select? git-file?))
(propagated-inputs `(("coreutils" ,coreutils)
- ("diffutils" ,diffutils)
("gemma-wrapper" ,gemma-wrapper)
+ ("csvdiff" ,go-github-com-aswinkarthik-csvdiff)
("python" ,python-wrapper)
("python-bcrypt" ,python-bcrypt)
("python-flask" ,python-flask)
@@ -84,15 +85,11 @@
("python-redis" ,python-redis)
("python-requests" ,python-requests)
("python-scipy" ,python-scipy)
- ("python-sqlalchemy-stubs" ,python-sqlalchemy-stubs)
- ("r" ,r)
- ("r-qtl" ,r-qtl)
+ ("python-sqlalchemy-stubs"
+ ,python-sqlalchemy-stubs)
("r-optparse" ,r-optparse)
- ("r-stringi" ,r-stringi)
- ("r-stringr" ,r-stringr)
- ("r-testthat" ,r-testthat)
- ("r-wgcna" ,r-wgcna)
- ))
+ ("r-qtl" ,r-qtl)
+ ("r-stringi" ,r-stringi)))
(build-system python-build-system)
(home-page "https://github.com/genenetwork/genenetwork3")
(synopsis "GeneNetwork3 API for data science and machine learning.")
diff --git a/sql/schema.org b/sql/schema.org
new file mode 100644
index 0000000..2db8a27
--- /dev/null
+++ b/sql/schema.org
@@ -0,0 +1,39 @@
+#+TITLE: GeneNetwork Database Schema
+
+This is an attempt to reverse engineer and understand the schema of the
+GeneNetwork database. The goal is to prune redundant tables, fields, etc. and
+arrive at a simplified schema. This simplified schema will be useful when
+migrating the database.
+
+* Species
+** Id
+ Primary key
+** SpeciesId
+ Looks like a redundant key referred to as a foreign key from many other
+ tables. This field should be replaced by Id.
+** SpeciesName
+ Common name of the species. This field can be replaced by MenuName.
+** Name
+ Downcased common name used as key for the species in dictionaries
+** MenuName
+ Name in the Species dropdown menu. This is the SpeciesName, but sometimes
+ with the reference genome identifier mentioned in brackets.
+** FullName
+ Binomial name of the species
+** TaxonomyId
+ Foreign keys?
+** OrderId
+ Foreign keys?
+
+* Strain
+** Id
+ Primary key
+** Name
+ Name of the strain
+** Name2
+ A second name. For most rows, this is the same as Name. Why is this
+ necessary?
+** SpeciesId
+ Foreign key into the Species table
+** Symbol
+** Alias