aboutsummaryrefslogtreecommitdiff
path: root/gn
diff options
context:
space:
mode:
authorPjotr Prins2023-08-18 15:41:28 +0200
committerPjotr Prins2023-08-18 15:41:28 +0200
commitafd3d25530401569a5953dc1c411f43a56ebd02c (patch)
tree95188317effce4e1b90e898b1d4f582cb033c848 /gn
parentb9fb2c69136fe243fc9c6f7c3c4f8917814a401e (diff)
downloadgn-guile-afd3d25530401569a5953dc1c411f43a56ebd02c.tar.gz
Adding test infrastructure and reorganizing modules - still not happy with (web uri)
Diffstat (limited to 'gn')
-rw-r--r--gn/data/species.scm112
-rw-r--r--gn/db/sparql.scm14
2 files changed, 125 insertions, 1 deletions
diff --git a/gn/data/species.scm b/gn/data/species.scm
new file mode 100644
index 0000000..a1eb15f
--- /dev/null
+++ b/gn/data/species.scm
@@ -0,0 +1,112 @@
+(define-module (gn data species)
+ #:use-module (json)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 format)
+ #:use-module (ice-9 iconv)
+ #:use-module (ice-9 receive)
+ #:use-module (ice-9 string-fun)
+ #:use-module (gn db sparql)
+ #:use-module (web gn-uri)
+
+ #:export (
+ get-species-meta
+ get-species-data
+ get-species-shortnames
+ ))
+
+(define (get-species)
+ (receive (names res) (memo-sparql-species-meta)
+ (let* ([table (get-rows names res)]
+ [recs '()]
+ [h (compile-species recs table)])
+ (species-digest h))
+ ))
+
+;; result should be a vector of list of pair
+(define (species-digest recs)
+ (map (lambda (r)
+ (let* ([k (car r)]
+ [v (cdr r)])
+ ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v))
+ (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v)
+ ))
+ recs )
+ )
+
+(define (expand-species rec)
+ (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))]
+ [short-name (normalize-id (assoc-ref rec "shortName"))])
+ (if (string=? wd-id "unknown")
+ rec
+ ; wikidata query:
+ (receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id))
+ (match (pk (car row))
+ ((taxonomy-name ncbi descr)
+ (let ([ncbi-id (strip-lang ncbi)]
+ [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")])
+ (cons `("id" . ,short-name)
+ (cons `("wikidata" . ,wd-id)
+ (cons `("taxonomy-id" . ,ncbi-id)
+ (cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id))
+ (cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id))
+ (cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id))
+ (cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk))
+ (cons `("taxonomy-name" . ,(strip-lang taxonomy-name))
+ (cons `("meta" . ,(mk-meta short-name))
+ (cons `("description" . ,(strip-lang descr))
+ rec))))))))))))
+ )
+ )))
+ )
+
+
+(define (get-expanded-species)
+ "Here we add information related to each species"
+ (map (lambda (rec)
+ (expand-species rec)
+ ) (get-species)
+))
+
+(define (get-expanded-species1 short-name)
+ "Here we add information related to one taxonomy species"
+ (call/cc (lambda (return)
+ (for-each (lambda (rec)
+ (if (string=? (assoc-ref rec "shortName") short-name)
+ (return (expand-species rec))))
+ (get-species))
+ (return #f)
+)))
+
+(define (get-species-api-str)
+ (scm->json-string #("https://genenetwork.org/api/v2/mouse/"
+ "https://genenetwork.org/api/v2/rat/")))
+
+(define (get-species-shortnames recs)
+ (map (lambda r (assoc-ref (car r) "shortName")) recs))
+
+(define (get-species-meta2 recs)
+ "Return a list of short names and expand them to URIs"
+ (map (lambda r
+ (let ([shortname (assoc-ref (car r) "shortName")])
+ (cons shortname (mk-meta shortname)))) recs)
+ )
+
+(define (get-species-links recs)
+ "Return a list of short names and expand them to URIs"
+ (map (lambda r
+ (let ([shortname (assoc-ref (car r) "shortName")])
+ (cons shortname (mk-data shortname)))) recs)
+ )
+
+(define (get-species-data)
+ (list->vector (get-expanded-species)))
+
+(define (get-species-meta)
+ (let ([recs (get-expanded-species)])
+ `(("info" . "Get information on species by visiting the data link or one of the individual links")
+ ("doc" . ,(mk-doc "species"))
+ ("meta" . ,(mk-meta "species"))
+ ("data" . ,(mk-data "species"))
+ ("up" . ,(string-append (prefix) "/"))
+ ("meta-links" . ,(get-species-meta2 recs))
+ ("links" . ,(get-species-links recs)))))
diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm
index 95b4e1d..0e8c938 100644
--- a/gn/db/sparql.scm
+++ b/gn/db/sparql.scm
@@ -5,9 +5,11 @@
#:use-module (ice-9 iconv)
#:use-module (ice-9 receive)
#:use-module (ice-9 string-fun)
- #:use-module (gn cache memoize)
#:use-module (web client)
+ #:use-module (web request)
#:use-module (web uri)
+ #:use-module (gn cache memoize)
+ #:use-module (web gn-uri)
#:export (memo-sparql-species
memo-sparql-species-meta
@@ -15,9 +17,19 @@
compile-species
get-rows
tsv->scm
+ strip-lang
)
)
+
+(define (strip-lang s)
+ "Strip quotes and language tag (@en) from RDF entries"
+ (list->string (match (string->list s)
+ [(#\"rest ... #\") rest]
+ [(#\"rest ... #\" #\@ #\e #\n) rest]
+ [rest rest]))
+ )
+
(define (gn-sparql-endpoint-url)
"https://sparql.genenetwork.org/sparql")