about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--README.md24
-rw-r--r--conn.scm4
-rw-r--r--dump/schema-dump.scm11
-rwxr-xr-xexamples/dump-species-metadata.scm10
-rw-r--r--schema/species.ttl8
5 files changed, 39 insertions, 18 deletions
diff --git a/README.md b/README.md
index 0a4ada8..caecddc 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,7 @@ badge](https://ci.genenetwork.org/badge/dump-genenetwork-database-tests.svg)](ht
 [![dump-genenetwork-database CI
 badge](https://ci.genenetwork.org/badge/dump-genenetwork-database.svg)](https://ci.genenetwork.org/jobs/dump-genenetwork-database)
 
-The GeneNetwork database is being migrated from a relational database to
-a plain text and RDF database. This repository contains code to dump the
-relational database to plain text.
+This repository contains code to dump the metadata in the GeneNetwork relational database to RDF. It requires a connection to a SQL server.
 
 # Using
 
@@ -15,6 +13,12 @@ Drop into a development environment with
 $ guix shell -m manifest.scm
 ```
 
+If the path is not picked up add
+
+```
+export PATH=$GUIX_ENVIRONMENT/bin:$PATH
+```
+
 Build the sources.
 
 ``` shell
@@ -74,22 +78,22 @@ Here's a sample *conn.scm*.
 
 ## Dump the database
 
-Then, to dump the database to \~/data/dump, run
+Then, to dump the database to \~/data/dump, run inside shell
 
-``` shell
-$ guix shell -m manifest.scm -- ./pre-inst-env ./examples/dump-dataset-metadata.scm conn.scm ~/data/dump-data/
+```sh
+./pre-inst-env ./examples/dump-species-metadata.scm ../conn.scm ~/tmp
 ```
 
-Make sure there is enough free space! It\'s best to dump the database on
-penguin2 where disk space and bandwidth are not significant
-constraints.
+``` shell
+$ guix shell -m manifest.scm -- ./pre-inst-env ./examples/dump-dataset-metadata.scm ../conn.scm ~/tmp
+```
 
 ## Validate and load dump
 
 Then, validate the dumped RDF using `rapper` and load it into
 virtuoso. This will load the dumped RDF into the
 `http://genenetwork.org` graph, and will delete all pre-existing data
-in that graph.
+in that graph (FIXME)
 
 ``` shell
 $ guix shell -m manifest.scm -- rapper --input turtle --count ~/data/dump/dump.ttl
diff --git a/conn.scm b/conn.scm
index 8a552ab..aca2835 100644
--- a/conn.scm
+++ b/conn.scm
@@ -1,11 +1,11 @@
 ((sql-username . "webqtlout")
- (sql-password . "webqtlout")
+ (sql-password . "*")
  (sql-database . "db_webqtl")
  (sql-host . "localhost")
  (sql-port . 3306)
  (virtuoso-port . 8891)
  (virtuoso-username . "dba")
- (virtuoso-password . "dba")
+ (virtuoso-password . "*")
  (sparql-scheme . http)
  (sparql-host . "localhost")
  (sparql-port . 8892)
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm
index 876eafb..86626f4 100644
--- a/dump/schema-dump.scm
+++ b/dump/schema-dump.scm
@@ -1,6 +1,10 @@
 (define-module (dump schema)
   #:use-module (ice-9 match)
-  #:use-module (dump sql))
+  #:use-module (ice-9 srfi-26)
+  #:use-module (dump sql)
+  #:use-module (dump triples)
+  #:use-module (dump strings)
+  #:use-module (dump table))
 
 
 (define (dump-table-fields db table)
@@ -90,8 +94,9 @@ is a <table> object."
                             (table-columns table))))
               tables)))
 
-(define (dump-data-table db table-name data-field)
-  (let ((dump-directory (string-append %dump-directory "/" table-name))
+(define* (dump-data-table db table-name data-field
+                          #:optional (default-dump-directory ""))
+  (let ((dump-directory (string-append default-dump-directory "/" table-name))
         (port #f)
         (current-strain-id #f))
     (unless (file-exists? dump-directory)
diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm
index 39f7147..6ac2640 100755
--- a/examples/dump-species-metadata.scm
+++ b/examples/dump-species-metadata.scm
@@ -18,6 +18,16 @@
 
 
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Monkey (Macaca nemestrina)" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
 (define-dump dump-species
   (tables (Species))
   (schema-triples
diff --git a/schema/species.ttl b/schema/species.ttl
index 6b3acaf..cc5b989 100644
--- a/schema/species.ttl
+++ b/schema/species.ttl
@@ -6,14 +6,16 @@
 @prefix gn: <http://genenetwork.org/id/> .
 
 gn:Arabidopsis_thaliana rdf:isDefinedBy wd:Q158695 .
+# Bat
 gn:Glossophaga_soricina rdf:isDefinedBy wd:Q304929 .
-gn:Drosophila_melanogaster_dm6 rdf:isDefinedBy wd:Q130888 .
+gn:Drosophila_melanogaster rdf:isDefinedBy wd:Q130888 .
 gn:Glycine_max rdf:isDefinedBy wd:Q11006 .
 gn:Homo_sapiens rdf:isDefinedBy wd:Q15978631 .
 gn:Hordeum_vulgare rdf:isDefinedBy wd:Q11577 .
-gn:Macaca_mulatta rdf:isDefinedBy wd:Q177601 .
+gn:Macaca_mulatta rdf:isDefinedBy wd:Q156606 .
 gn:Mus_musculus rdf:isDefinedBy wd:Q83310 .
-gn:Japanese_medaka rdf:isDefinedBy wd:Q1142975 .
+# Japanese Medaka
+gn:Oryzias_latipes rdf:isDefinedBy wd:Q1142975 .
 gn:Populus_trichocarpa rdf:isDefinedBy wd:Q149382 .
 gn:Rattus_norvegicus rdf:isDefinedBy wd:Q184224 .
 gn:Solanum_lycopersicum rdf:isDefinedBy wd:Q23501 .