diff options
Diffstat (limited to 'gn')
| -rw-r--r-- | gn/data/dataset.scm | 61 | ||||
| -rw-r--r-- | gn/data/genotype.scm | 1 | ||||
| -rw-r--r-- | gn/db/mysql.scm | 14 | ||||
| -rwxr-xr-x[-rw-r--r--] | gn/db/sparql.scm | 82 | ||||
| -rw-r--r-- | gn/runner/gemma.scm | 15 |
5 files changed, 121 insertions, 52 deletions
diff --git a/gn/data/dataset.scm b/gn/data/dataset.scm index c28cf25..afe75ba 100644 --- a/gn/data/dataset.scm +++ b/gn/data/dataset.scm @@ -4,14 +4,21 @@ #:use-module (ice-9 iconv) #:use-module (ice-9 receive) #:use-module (ice-9 string-fun) + #:use-module (srfi srfi-1) #:use-module (dbi dbi) #:use-module (gn db mysql) + #:use-module (gn data genotype) #:use-module (gn data group) #:use-module (gn util convert) #:use-module (web gn-uri) + #:use-module (rnrs base) ; for assert #:export ( dataset-name + get-bxd-publish-list + get-bxd-publish-values-list + get-bxd-publish-name-value-dict + get-bxd-publish-dataid-name-value-dict )) (define (get-dataset db probesetfreeze-id) @@ -22,3 +29,57 @@ (define (dataset-name db probesetfreeze-id) (assoc-ref (get-dataset db probesetfreeze-id) "Name")) + +(define (get-dataid-from-publishxrefid id) + "Get the internal dataid from publishxref - which is the same as used in the GN2 web interface" + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Id,PhenotypeId,DataId FROM PublishXRef WHERE Id=" id " AND InbredSetId=1 LIMIT 1"))] + (dbi-query db query) + (pk (int-to-string (assoc-ref (get-row db) "DataId"))))))) + +(define (get-bxd-publish-list) + (call-with-db + (lambda (db) + (let [(query "SELECT Id,PhenotypeId,DataId FROM PublishXRef WHERE InbredSetId=1")] + (dbi-query db query) + (get-rows db '()))))) + +(define* (get-bxd-publish-values-list dataid #:optional used-for-mapping?) + "Returns dict of name values , e.g. [{\"Name\":\"C57BL/6J\",\"value\":9.136},{\"Name\":\"DBA/2J\",\"value\":4.401},{\"Name\":\"BXD9\",\"value\":4.36}, ... used-for-mapping? skips the founders and maybe other unmappable inds. Note, currently unused." + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Strain.Name, PublishData.value FROM Strain, PublishData WHERE PublishData.Id=" dataid " and Strain.Id=StrainID;"))] + (dbi-query db query) + (if used-for-mapping? + (remove null? (pk (get-rows-apply db + (lambda (r) + (if (string-contains (assoc-ref r "Name") "BXD") + `(("Name" . ,(assoc-ref r "Name")) ("value" . ,(assoc-ref r "value"))) + '() ) ;; return empty on no match + ) '()))) + (get-rows db '()) + ))))) + +(define* (get-bxd-publish-dataid-name-value-dict dataid #:optional used-for-mapping?) + "Returns dict of name values, e.g. (((\"C57BL/6J\" . 9.136) (\"DBA/2J\" . 4.401) (\"BXD9\" . 4.36) ... used-for-mapping? skips the founders and maybe other unmappable inds." + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Strain.Name, PublishData.value FROM Strain, PublishData WHERE PublishData.Id=" dataid " and Strain.Id=StrainID;"))] + (dbi-query db query) + (if used-for-mapping? + (remove null? (pk (get-rows-apply db + (lambda (r) + (if (string-contains (assoc-ref r "Name") "BXD") + `(,(assoc-ref r "Name") . ,(assoc-ref r "value")) + '() ) ;; return empty on no match + ) '()))) + (remove null? (pk (get-rows-apply db + (lambda (r) + `(,(assoc-ref r "Name") . ,(assoc-ref r "value")) + ) '()))) + ))))) + +(define* (get-bxd-publish-name-value-dict id #:optional used-for-mapping?) + "Same as above function, but starting from data id" + (get-bxd-publish-dataid-name-value-dict (get-dataid-from-publishxrefid id) used-for-mapping?)) diff --git a/gn/data/genotype.scm b/gn/data/genotype.scm index c7cb63c..5574382 100644 --- a/gn/data/genotype.scm +++ b/gn/data/genotype.scm @@ -16,6 +16,7 @@ )) (define (geno-inds-bxd fn) + "Returns information from GN's BXD.json, note it fetches the first geno file info, now BXD.8.geno" (let [(js (call-with-input-file fn (lambda (port) (json->scm port))))] diff --git a/gn/db/mysql.scm b/gn/db/mysql.scm index ccd414a..8da7b60 100644 --- a/gn/db/mysql.scm +++ b/gn/db/mysql.scm @@ -32,22 +32,26 @@ ;; (display "===> OPENING DB") ;; (newline) (let [(db (dbi-open "mysql" "webqtlout:webqtlout:db_webqtl:tcp:127.0.0.1:3306"))] - (ensure db) + (ensure db "Can't open connection") db ))) (define (call-with-db thunk) - (thunk (db-open))) + (let* [(db (db-open)) + (result (thunk db))] + (dbi-close db) + result)) -(define (ensure db) +(define (ensure db msg1) "Use DBI-style handle to report an error. On error the program will stop." (match (dbi-get_status db) ((stat . msg) (if (= stat 0) #t (begin - (display msg) + (display "SQL Connection ERROR! ") + (display (string-append msg1 " - " msg) (newline) - (assert stat)))))) + (assert #f))))))) (define (has-result? db) "Return #t or #f if result is valid" diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm index bd7a306..fbcd2cc 100644..100755 --- a/gn/db/sparql.scm +++ b/gn/db/sparql.scm @@ -18,10 +18,11 @@ the case. #:use-module (json) #:use-module (srfi srfi-1) #:use-module (web client) + #:use-module (web http) #:use-module (web gn-uri) #:use-module (web request) + #:use-module (web response) #:use-module (web uri) - #:export (memo-sparql-species memo-sparql-species-meta sparql-species-meta @@ -37,9 +38,11 @@ the case. strip-lang make-table make-pairs - ) -) + sparql-http-get + sparql-by-term)) +(define virtuoso-endpoint + (or (getenv "SPARQL-ENDPOINT") "http://localhost:8890/sparql/")) (define (strip-lang s) "Strip quotes and language tag (@en) from RDF entries" @@ -58,9 +61,9 @@ the case. (define (gn-sparql-prefix query) (string-append " -PREFIX gn: <http://genenetwork.org/id/> -PREFIX gnt: <http://genenetwork.org/term/> -PREFIX gnc: <http://genenetwork.org/category/> +PREFIX gn: <http://rdf.genenetwork.org/v1/id/> +PREFIX gnt: <http://rdf.genenetwork.org/v1/term/> +PREFIX gnc: <http://rdf.genenetwork.org/v1/category/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " query)) @@ -108,46 +111,7 @@ Note this procedure works for GN, but does not yet work for wikidata" (define (tsv->scm text) "Split a TSV string into a list of fields. Returns list of names header) and rows" (let ([lst (map (lambda (f) (string-split f #\tab) ) (delete "" (string-split text #\newline)))]) - (values (car lst) (cdr lst)) - )) - -#! -(define-values (names res) (sparql-species-meta)) -(define table (get-rows names res)) -(define recs '()) -(define h (compile-species recs table)) -(assoc "http://genenetwork.org/species_drosophila_melanogaster" h) -(assoc-ref h "http://genenetwork.org/id/Drosophila_melanogaster") -(define d (car h)) -(assoc-ref (list d) "http://genenetwork.org/species_drosophila_melanogaster") - -(scm->json #(1 (("2" . 3)))) -;; [1,{"2":3}] -(scm->json #("http://genenetwork.org/species_drosophila_melanogaster" (("http://genenetwork.org/menuName" . "Drosophila") ("http://genenetwork.org/binomialName" . "Drosophila melanogaster") ))) -;; ["http://genenetwork.org/species_drosophila_melanogaster",{"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}] -l -;; (("http://genenetwork.org/menuName" "Drosophila") ("http://genenetwork.org/name" "Drosophila") ("http://genenetwork.org/binomialName" "Drosophila melanogaster")) -(scm->json (map (lambda (i) (cons (car i) (car (cdr i)))) l)) -;; {"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/name":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"} - - -curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="SELECT DISTINCT * where { - wd:Q158695 wdt:P225 ?o . -} limit 5" -{ - "head" : { - "vars" : [ "o" ] - }, - "results" : { - "bindings" : [ { - "o" : { - "type" : "literal", - "value" : "Arabidopsis thaliana" - } - } ] - } -} -!# + (values (car lst) (cdr lst)))) (define (sparql-wd-species-info species) "Returns wikidata entry for species, e.g.: @@ -326,3 +290,29 @@ dump-species-metadata.ttl:gn:Axbxa gnt:belongsToSpecies gn:Mus_musculus . " gnid " ?key ?value . # FILTER ( !EXISTS{ " gnid " gnt:hasTissue ?value }) }"))) + + +(define* (sparql-http-get endpoint-url query #:optional (mime-type "text/microdata+html")) + (receive (response-status response-body) + (http-request + (format #f "~a?default-graph-uri=&query=~a&format=~a" + endpoint-url (uri-encode query) (uri-encode mime-type)) + #:method 'GET) + (values + (build-response + #:code (response-code response-status) + #:headers `((content-type . ,(parse-header 'content-type mime-type)))) + response-body))) + +(define (sparql-by-term prefix val) + (let ((url-alist '((gn . "<http://rdf.genenetwork.org/v1/id/>") + (gnc . "<http://rdf.genenetwork.org/v1/category/>") + (gnt . "<http://rdf.genenetwork.org/v1/term/>")))) + (format #f "PREFIX ~a: ~a + +CONSTRUCT { + ~a:~a ?p ?o . +} FROM <http://rdf.genenetwork.org/v1> +WHERE { + ~a:~a ?p ?o . +}" prefix (assoc-ref url-alist prefix) prefix val prefix val))) diff --git a/gn/runner/gemma.scm b/gn/runner/gemma.scm index 9a5c0fc..c577305 100644 --- a/gn/runner/gemma.scm +++ b/gn/runner/gemma.scm @@ -10,11 +10,24 @@ #:use-module (rnrs base) #:export ( - write-pheno-file + gemma-pheno-txt invoke-gemma-wrapper-loco run-gemma )) +(define (gemma-pheno-txt family traits) + "Return a list of values for GEMMA" + (assert (string=? family "BXD")) ; only supported right now + (define bxd-inds (geno-inds-bxd "BXD.json")) + (assert (= 235 (length bxd-inds))) + (map (lambda (ind) + (let [(value (assoc-ref traits ind))] + (if value + (format #f "~a" value) + "NA\n") + )) + bxd-inds)) + (define (write-pheno-file fn traits) (define bxd-inds (geno-inds-bxd "BXD.json")) (assert (= 235 (length bxd-inds))) |
