(define-module (gn data species) #:use-module (json) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (ice-9 iconv) #:use-module (ice-9 receive) #:use-module (ice-9 string-fun) #:use-module (gn db sparql) #:use-module (gn data group) #:use-module (web gn-uri) #:export ( gnid-species get-species get-species-meta get-species-data get-species-shortnames get-species-binominal-names get-expanded-species get-expanded-taxon-meta get-expanded-taxon-data )) (define (gnid-species short-name) "Find the GN identifier from shortname, e.g. Mus_musculus" (let ([rec (get-expanded-taxon-data short-name)]) (url-parse-id (assoc-ref rec "gnid")) )) (define (get-species) (receive (names res) (memo-sparql-species-meta) (let* ([table (get-rows names res)] [recs '()] [h (compile-species recs table)]) (species-digest h)) )) ;; result should be a vector of list of pair (define (species-digest recs) (map (lambda (r) (let* ([k (car r)] [v (cdr r)]) ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v)) (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v) )) recs ) ) (define (expand-species rec) (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))] [short-name (normalize-id (assoc-ref rec "shortName"))]) (if (string=? wd-id "unknown") rec ; wikidata query: (receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id)) (match (pk (car row)) ((taxonomy-name ncbi descr) (let ([ncbi-id (strip-lang ncbi)] [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")]) (cons `("id" . ,short-name) (cons `("wikidata" . ,wd-id) (cons `("taxonomy-id" . ,ncbi-id) (cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id)) (cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id)) (cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id)) (cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk)) (cons `("taxonomy-name" . ,(strip-lang taxonomy-name)) (cons `("meta" . ,(mk-meta short-name)) (cons `("description" . ,(strip-lang descr)) rec)))))))))))) ) ))) ) (define (get-expanded-taxon-data short-name) "Here we add information related to one taxonomy species" (call/cc (lambda (return) ; use call/cc to be able to return early (for-each (lambda (rec) (if (string=? (assoc-ref rec "shortName") short-name) (return (expand-species rec)))) (get-species)) (return #f) ))) (define (get-expanded-species) "Here we add information related to each species" (map (lambda (rec) (expand-species rec) ) (get-species))) (define (get-species-api-str) (scm->json-string #("https://genenetwork.org/api/v2/mouse/" "https://genenetwork.org/api/v2/rat/"))) (define (get-species-shortnames recs) (map (lambda r (assoc-ref (car r) "shortName")) recs)) (define (get-species-binominal-names recs) (map (lambda r (url-parse-id (car (car r)))) recs)) (define (get-species-meta2 recs) "Return a list of short names and expand them to URIs" (map (lambda r (let ([shortname (assoc-ref (car r) "shortName")]) (cons shortname (mk-meta shortname)))) recs) ) (define (get-species-links recs) "Return a list of short names and expand them to URIs" (map (lambda r (let ([shortname (assoc-ref (car r) "shortName")]) (cons shortname (mk-data shortname)))) recs) ) (define (get-species-data) (list->vector (get-expanded-species))) (define (get-species-meta) (let ([recs (get-expanded-species)]) `(("info" . "Get information on species by visiting the data link or one of the individual links") ("doc" . ,(mk-doc "species")) ("meta" . ,(mk-meta "species")) ("data" . ,(mk-data "species")) ("up" . ,(string-append (prefix) "/")) ("meta-links" . ,(get-species-meta2 recs)) ("links" . ,(get-species-links recs))))) (define (get-expanded-taxon-meta id) "Get information on a specific species, e.g. mouse" `(("info" . ,id) ("doc" . ,(mk-doc id)) ("meta" . ,(mk-meta id)) ("data" . ,(mk-data id)) ("up" . ,(mk-meta "species")) ("meta-links" . ,(list->vector (get-group-links (gnid-species id) (lambda (r) (mk-meta (url-parse-id r)))))) ("links" . ,(list->vector (get-group-links (gnid-species id) (lambda (r) (mk-data (url-parse-id r)))))) ))