From afd3d25530401569a5953dc1c411f43a56ebd02c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 18 Aug 2023 15:41:28 +0200 Subject: Adding test infrastructure and reorganizing modules - still not happy with (web uri) --- gn/data/species.scm | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++ gn/db/sparql.scm | 14 ++++++- 2 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 gn/data/species.scm (limited to 'gn') diff --git a/gn/data/species.scm b/gn/data/species.scm new file mode 100644 index 0000000..a1eb15f --- /dev/null +++ b/gn/data/species.scm @@ -0,0 +1,112 @@ +(define-module (gn data species) + #:use-module (json) + #:use-module (ice-9 match) + #:use-module (ice-9 format) + #:use-module (ice-9 iconv) + #:use-module (ice-9 receive) + #:use-module (ice-9 string-fun) + #:use-module (gn db sparql) + #:use-module (web gn-uri) + + #:export ( + get-species-meta + get-species-data + get-species-shortnames + )) + +(define (get-species) + (receive (names res) (memo-sparql-species-meta) + (let* ([table (get-rows names res)] + [recs '()] + [h (compile-species recs table)]) + (species-digest h)) + )) + +;; result should be a vector of list of pair +(define (species-digest recs) + (map (lambda (r) + (let* ([k (car r)] + [v (cdr r)]) + ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v)) + (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v) + )) + recs ) + ) + +(define (expand-species rec) + (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))] + [short-name (normalize-id (assoc-ref rec "shortName"))]) + (if (string=? wd-id "unknown") + rec + ; wikidata query: + (receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id)) + (match (pk (car row)) + ((taxonomy-name ncbi descr) + (let ([ncbi-id (strip-lang ncbi)] + [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")]) + (cons `("id" . ,short-name) + (cons `("wikidata" . ,wd-id) + (cons `("taxonomy-id" . ,ncbi-id) + (cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id)) + (cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id)) + (cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id)) + (cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk)) + (cons `("taxonomy-name" . ,(strip-lang taxonomy-name)) + (cons `("meta" . ,(mk-meta short-name)) + (cons `("description" . ,(strip-lang descr)) + rec)))))))))))) + ) + ))) + ) + + +(define (get-expanded-species) + "Here we add information related to each species" + (map (lambda (rec) + (expand-species rec) + ) (get-species) +)) + +(define (get-expanded-species1 short-name) + "Here we add information related to one taxonomy species" + (call/cc (lambda (return) + (for-each (lambda (rec) + (if (string=? (assoc-ref rec "shortName") short-name) + (return (expand-species rec)))) + (get-species)) + (return #f) +))) + +(define (get-species-api-str) + (scm->json-string #("https://genenetwork.org/api/v2/mouse/" + "https://genenetwork.org/api/v2/rat/"))) + +(define (get-species-shortnames recs) + (map (lambda r (assoc-ref (car r) "shortName")) recs)) + +(define (get-species-meta2 recs) + "Return a list of short names and expand them to URIs" + (map (lambda r + (let ([shortname (assoc-ref (car r) "shortName")]) + (cons shortname (mk-meta shortname)))) recs) + ) + +(define (get-species-links recs) + "Return a list of short names and expand them to URIs" + (map (lambda r + (let ([shortname (assoc-ref (car r) "shortName")]) + (cons shortname (mk-data shortname)))) recs) + ) + +(define (get-species-data) + (list->vector (get-expanded-species))) + +(define (get-species-meta) + (let ([recs (get-expanded-species)]) + `(("info" . "Get information on species by visiting the data link or one of the individual links") + ("doc" . ,(mk-doc "species")) + ("meta" . ,(mk-meta "species")) + ("data" . ,(mk-data "species")) + ("up" . ,(string-append (prefix) "/")) + ("meta-links" . ,(get-species-meta2 recs)) + ("links" . ,(get-species-links recs))))) diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm index 95b4e1d..0e8c938 100644 --- a/gn/db/sparql.scm +++ b/gn/db/sparql.scm @@ -5,9 +5,11 @@ #:use-module (ice-9 iconv) #:use-module (ice-9 receive) #:use-module (ice-9 string-fun) - #:use-module (gn cache memoize) #:use-module (web client) + #:use-module (web request) #:use-module (web uri) + #:use-module (gn cache memoize) + #:use-module (web gn-uri) #:export (memo-sparql-species memo-sparql-species-meta @@ -15,9 +17,19 @@ compile-species get-rows tsv->scm + strip-lang ) ) + +(define (strip-lang s) + "Strip quotes and language tag (@en) from RDF entries" + (list->string (match (string->list s) + [(#\"rest ... #\") rest] + [(#\"rest ... #\" #\@ #\e #\n) rest] + [rest rest])) + ) + (define (gn-sparql-endpoint-url) "https://sparql.genenetwork.org/sparql") -- cgit v1.2.3