about summary refs log tree commit diff
path: root/gn
diff options
context:
space:
mode:
authorPjotr Prins2023-08-18 15:41:28 +0200
committerPjotr Prins2023-08-18 15:41:28 +0200
commitafd3d25530401569a5953dc1c411f43a56ebd02c (patch)
tree95188317effce4e1b90e898b1d4f582cb033c848 /gn
parentb9fb2c69136fe243fc9c6f7c3c4f8917814a401e (diff)
downloadgn-guile-afd3d25530401569a5953dc1c411f43a56ebd02c.tar.gz
Adding test infrastructure and reorganizing modules - still not happy with (web uri)
Diffstat (limited to 'gn')
-rw-r--r--gn/data/species.scm112
-rw-r--r--gn/db/sparql.scm14
2 files changed, 125 insertions, 1 deletions
diff --git a/gn/data/species.scm b/gn/data/species.scm
new file mode 100644
index 0000000..a1eb15f
--- /dev/null
+++ b/gn/data/species.scm
@@ -0,0 +1,112 @@
+(define-module (gn data species)
+  #:use-module (json)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 format)
+  #:use-module (ice-9 iconv)
+  #:use-module (ice-9 receive)
+  #:use-module (ice-9 string-fun)
+  #:use-module (gn db sparql)
+  #:use-module (web gn-uri)
+
+  #:export (
+            get-species-meta
+            get-species-data
+            get-species-shortnames
+            ))
+
+(define (get-species)
+  (receive (names res) (memo-sparql-species-meta)
+    (let* ([table (get-rows names res)]
+           [recs '()]
+           [h (compile-species recs table)])
+      (species-digest h))
+    ))
+
+;; result should be a vector of list of pair
+(define (species-digest recs)
+  (map (lambda (r)
+	 (let* ([k (car r)]
+		[v (cdr r)])
+	   ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v))
+	   (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v)
+	   ))
+	 recs  )
+  )
+
+(define (expand-species rec)
+  (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))]
+	[short-name (normalize-id (assoc-ref rec "shortName"))])
+    (if (string=? wd-id "unknown")
+	rec
+                                        ; wikidata query:
+	(receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id))
+	  (match (pk (car row))
+	    ((taxonomy-name ncbi descr)
+	     (let ([ncbi-id (strip-lang ncbi)]
+		   [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")])
+	        (cons `("id" . ,short-name)
+		(cons `("wikidata" . ,wd-id)
+		(cons `("taxonomy-id" . ,ncbi-id)
+		(cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id))
+		(cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id))
+		(cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id))
+		(cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk))
+		(cons `("taxonomy-name" . ,(strip-lang taxonomy-name))
+		(cons `("meta" . ,(mk-meta short-name))
+		(cons `("description" . ,(strip-lang descr))
+		      rec))))))))))))
+		)
+	   )))
+  )
+
+
+(define (get-expanded-species)
+  "Here we add information related to each species"
+  (map (lambda (rec)
+         (expand-species rec)
+	 ) (get-species)
+))
+
+(define (get-expanded-species1 short-name)
+  "Here we add information related to one taxonomy species"
+  (call/cc (lambda (return)
+             (for-each (lambda (rec)
+                         (if (string=? (assoc-ref rec "shortName") short-name)
+                             (return (expand-species rec))))
+                       (get-species))
+             (return #f)
+)))
+
+(define (get-species-api-str)
+  (scm->json-string #("https://genenetwork.org/api/v2/mouse/"
+                      "https://genenetwork.org/api/v2/rat/")))
+
+(define (get-species-shortnames recs)
+  (map (lambda r (assoc-ref (car r) "shortName")) recs))
+
+(define (get-species-meta2 recs)
+  "Return a list of short names and expand them to URIs"
+  (map (lambda r
+	 (let ([shortname (assoc-ref (car r) "shortName")])
+	   (cons shortname (mk-meta shortname)))) recs)
+  )
+
+(define (get-species-links recs)
+  "Return a list of short names and expand them to URIs"
+  (map (lambda r
+	 (let ([shortname (assoc-ref (car r) "shortName")])
+	   (cons shortname (mk-data shortname)))) recs)
+  )
+
+(define (get-species-data)
+  (list->vector (get-expanded-species)))
+
+(define (get-species-meta)
+  (let ([recs (get-expanded-species)])
+    `(("info" . "Get information on species by visiting the data link or one of the individual links")
+      ("doc" . ,(mk-doc "species"))
+      ("meta" . ,(mk-meta "species"))
+      ("data" . ,(mk-data "species"))
+      ("up" . ,(string-append (prefix) "/"))
+      ("meta-links" . ,(get-species-meta2 recs))
+      ("links" . ,(get-species-links recs)))))
diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm
index 95b4e1d..0e8c938 100644
--- a/gn/db/sparql.scm
+++ b/gn/db/sparql.scm
@@ -5,9 +5,11 @@
   #:use-module (ice-9 iconv)
   #:use-module (ice-9 receive)
   #:use-module (ice-9 string-fun)
-  #:use-module (gn cache memoize)
   #:use-module (web client)
+  #:use-module (web request)
   #:use-module (web uri)
+  #:use-module (gn cache memoize)
+  #:use-module (web gn-uri)
 
   #:export (memo-sparql-species
             memo-sparql-species-meta
@@ -15,9 +17,19 @@
             compile-species
             get-rows
             tsv->scm
+            strip-lang
             )
 )
 
+
+(define (strip-lang s)
+  "Strip quotes and language tag (@en) from RDF entries"
+  (list->string (match (string->list s)
+		  [(#\"rest ... #\") rest]
+		  [(#\"rest ... #\" #\@ #\e #\n) rest]
+		  [rest rest]))
+  )
+
 (define (gn-sparql-endpoint-url)
   "https://sparql.genenetwork.org/sparql")