aboutsummaryrefslogtreecommitdiff
path: root/gn/data/species.scm
blob: 100cccfe102351f7f56452a848947f3fc1ce27c5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
(define-module (gn data species)
  #:use-module (json)
  #:use-module (ice-9 match)
  #:use-module (ice-9 format)
  #:use-module (ice-9 iconv)
  #:use-module (ice-9 receive)
  #:use-module (ice-9 string-fun)
  #:use-module (gn db sparql)
  #:use-module (gn data group)
  #:use-module (web gn-uri)

  #:export (
            gnid-species
            get-species
            get-species-meta
            get-species-data
            get-species-shortnames
            get-species-binominal-names
            get-expanded-species
            get-expanded-taxon-meta
            get-expanded-taxon-data
            ))

(define (gnid-species short-name)
  "Find the GN identifier from shortname, e.g. Mus_musculus"
  (let ([rec (get-expanded-taxon-data short-name)])
    (url-parse-id (assoc-ref rec "gnid"))
  ))

(define (get-species)
  (receive (names res) (memo-sparql-species-meta)
    (let* ([table (get-rows names res)]
           [recs '()]
           [h (compile-species recs table)])
      (species-digest h))
    ))

;; result should be a vector of list of pair
(define (species-digest recs)
  (map (lambda (r)
	 (let* ([k (car r)]
		[v (cdr r)])
	   ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v))
	   (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v)
	   ))
	 recs  )
  )

(define (expand-species rec)
  (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))]
	[short-name (normalize-id (assoc-ref rec "shortName"))])
    (if (string=? wd-id "unknown")
	rec
                                        ; wikidata query:
	(receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id))
	  (match (pk (car row))
	    ((taxonomy-name ncbi descr)
	     (let ([ncbi-id (strip-lang ncbi)]
		   [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")])
	        (cons `("id" . ,short-name)
		(cons `("wikidata" . ,wd-id)
		(cons `("taxonomy-id" . ,ncbi-id)
		(cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id))
		(cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id))
		(cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id))
		(cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk))
		(cons `("taxonomy-name" . ,(strip-lang taxonomy-name))
		(cons `("meta" . ,(mk-meta short-name))
		(cons `("description" . ,(strip-lang descr))
		      rec))))))))))))
		)
	   )))
  )


(define (get-expanded-taxon-data short-name)
  "Here we add information related to one taxonomy species"
  (call/cc (lambda (return) ; use call/cc to be able to return early
             (for-each (lambda (rec)
                         (if (string=? (assoc-ref rec "shortName") short-name)
                             (return (expand-species rec))))
                       (get-species))
             (return #f)
)))


(define (get-expanded-species)
  "Here we add information related to each species"
  (map (lambda (rec)
         (expand-species rec)
         ) (get-species)))

(define (get-species-api-str)
  (scm->json-string #("https://genenetwork.org/api/v2/mouse/"
                      "https://genenetwork.org/api/v2/rat/")))

(define (get-species-shortnames recs)
  (map (lambda r (assoc-ref (car r) "shortName")) recs))

(define (get-species-binominal-names recs)
  (map (lambda r (url-parse-id (car (car r)))) recs))

(define (get-species-meta2 recs)
  "Return a list of short names and expand them to URIs"
  (map (lambda r
	 (let ([shortname (assoc-ref (car r) "shortName")])
	   (cons shortname (mk-meta shortname)))) recs)
  )

(define (get-species-links recs)
  "Return a list of short names and expand them to URIs"
  (map (lambda r
	 (let ([shortname (assoc-ref (car r) "shortName")])
	   (cons shortname (mk-data shortname)))) recs)
  )

(define (get-species-data)
  (list->vector (get-expanded-species)))

(define (get-species-meta)
  (let ([recs (get-expanded-species)])
    `(("info" . "Get information on species by visiting the data link or one of the individual links")
      ("doc" . ,(mk-doc "species"))
      ("meta" . ,(mk-meta "species"))
      ("data" . ,(mk-data "species"))
      ("up" . ,(string-append (prefix) "/"))
      ("meta-links" . ,(get-species-meta2 recs))
      ("links" . ,(get-species-links recs)))))


(define (get-expanded-taxon-meta id)
  "Get information on a specific species, e.g. mouse"
  `(("info" . ,id)
    ("doc" . ,(mk-doc id))
    ("meta" . ,(mk-meta id))
    ("data" . ,(mk-data id))
    ("up" . ,(mk-meta "species"))
    ("meta-links" . ,(list->vector (get-group-links (gnid-species id)
                                                    (lambda (r) (mk-meta (url-parse-id r))))))
    ("links" . ,(list->vector (get-group-links (gnid-species id)
                                               (lambda (r) (mk-data (url-parse-id r))))))
  ))