aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md24
-rw-r--r--conn.scm4
-rw-r--r--dump/schema-dump.scm11
-rwxr-xr-xexamples/dump-species-metadata.scm10
-rw-r--r--schema/species.ttl8
5 files changed, 39 insertions, 18 deletions
diff --git a/README.md b/README.md
index 0a4ada8..caecddc 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,7 @@ badge](https://ci.genenetwork.org/badge/dump-genenetwork-database-tests.svg)](ht
[![dump-genenetwork-database CI
badge](https://ci.genenetwork.org/badge/dump-genenetwork-database.svg)](https://ci.genenetwork.org/jobs/dump-genenetwork-database)
-The GeneNetwork database is being migrated from a relational database to
-a plain text and RDF database. This repository contains code to dump the
-relational database to plain text.
+This repository contains code to dump the metadata in the GeneNetwork relational database to RDF. It requires a connection to a SQL server.
# Using
@@ -15,6 +13,12 @@ Drop into a development environment with
$ guix shell -m manifest.scm
```
+If the path is not picked up add
+
+```
+export PATH=$GUIX_ENVIRONMENT/bin:$PATH
+```
+
Build the sources.
``` shell
@@ -74,22 +78,22 @@ Here's a sample *conn.scm*.
## Dump the database
-Then, to dump the database to \~/data/dump, run
+Then, to dump the database to \~/data/dump, run inside shell
-``` shell
-$ guix shell -m manifest.scm -- ./pre-inst-env ./examples/dump-dataset-metadata.scm conn.scm ~/data/dump-data/
+```sh
+./pre-inst-env ./examples/dump-species-metadata.scm ../conn.scm ~/tmp
```
-Make sure there is enough free space! It\'s best to dump the database on
-penguin2 where disk space and bandwidth are not significant
-constraints.
+``` shell
+$ guix shell -m manifest.scm -- ./pre-inst-env ./examples/dump-dataset-metadata.scm ../conn.scm ~/tmp
+```
## Validate and load dump
Then, validate the dumped RDF using `rapper` and load it into
virtuoso. This will load the dumped RDF into the
`http://genenetwork.org` graph, and will delete all pre-existing data
-in that graph.
+in that graph (FIXME)
``` shell
$ guix shell -m manifest.scm -- rapper --input turtle --count ~/data/dump/dump.ttl
diff --git a/conn.scm b/conn.scm
index 8a552ab..aca2835 100644
--- a/conn.scm
+++ b/conn.scm
@@ -1,11 +1,11 @@
((sql-username . "webqtlout")
- (sql-password . "webqtlout")
+ (sql-password . "*")
(sql-database . "db_webqtl")
(sql-host . "localhost")
(sql-port . 3306)
(virtuoso-port . 8891)
(virtuoso-username . "dba")
- (virtuoso-password . "dba")
+ (virtuoso-password . "*")
(sparql-scheme . http)
(sparql-host . "localhost")
(sparql-port . 8892)
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm
index 876eafb..86626f4 100644
--- a/dump/schema-dump.scm
+++ b/dump/schema-dump.scm
@@ -1,6 +1,10 @@
(define-module (dump schema)
#:use-module (ice-9 match)
- #:use-module (dump sql))
+ #:use-module (ice-9 srfi-26)
+ #:use-module (dump sql)
+ #:use-module (dump triples)
+ #:use-module (dump strings)
+ #:use-module (dump table))
(define (dump-table-fields db table)
@@ -90,8 +94,9 @@ is a <table> object."
(table-columns table))))
tables)))
-(define (dump-data-table db table-name data-field)
- (let ((dump-directory (string-append %dump-directory "/" table-name))
+(define* (dump-data-table db table-name data-field
+ #:optional (default-dump-directory ""))
+ (let ((dump-directory (string-append default-dump-directory "/" table-name))
(port #f)
(current-strain-id #f))
(unless (file-exists? dump-directory)
diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm
index 39f7147..6ac2640 100755
--- a/examples/dump-species-metadata.scm
+++ b/examples/dump-species-metadata.scm
@@ -18,6 +18,16 @@
+(define (remap-species-identifiers str)
+ "This procedure remaps identifiers to standard binominal. Obviously this should
+ be sorted by correcting the database!"
+ (match str
+ ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+ ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+ ["Monkey (Macaca nemestrina)" "Macaca nemestrina"]
+ ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+ [str str]))
+
(define-dump dump-species
(tables (Species))
(schema-triples
diff --git a/schema/species.ttl b/schema/species.ttl
index 6b3acaf..cc5b989 100644
--- a/schema/species.ttl
+++ b/schema/species.ttl
@@ -6,14 +6,16 @@
@prefix gn: <http://genenetwork.org/id/> .
gn:Arabidopsis_thaliana rdf:isDefinedBy wd:Q158695 .
+# Bat
gn:Glossophaga_soricina rdf:isDefinedBy wd:Q304929 .
-gn:Drosophila_melanogaster_dm6 rdf:isDefinedBy wd:Q130888 .
+gn:Drosophila_melanogaster rdf:isDefinedBy wd:Q130888 .
gn:Glycine_max rdf:isDefinedBy wd:Q11006 .
gn:Homo_sapiens rdf:isDefinedBy wd:Q15978631 .
gn:Hordeum_vulgare rdf:isDefinedBy wd:Q11577 .
-gn:Macaca_mulatta rdf:isDefinedBy wd:Q177601 .
+gn:Macaca_mulatta rdf:isDefinedBy wd:Q156606 .
gn:Mus_musculus rdf:isDefinedBy wd:Q83310 .
-gn:Japanese_medaka rdf:isDefinedBy wd:Q1142975 .
+# Japanese Medaka
+gn:Oryzias_latipes rdf:isDefinedBy wd:Q1142975 .
gn:Populus_trichocarpa rdf:isDefinedBy wd:Q149382 .
gn:Rattus_norvegicus rdf:isDefinedBy wd:Q184224 .
gn:Solanum_lycopersicum rdf:isDefinedBy wd:Q23501 .