about summary refs log tree commit diff
path: root/gn/db/sparql.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gn/db/sparql.scm')
-rw-r--r--gn/db/sparql.scm59
1 files changed, 52 insertions, 7 deletions
diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm
index f03389b..bd7a306 100644
--- a/gn/db/sparql.scm
+++ b/gn/db/sparql.scm
@@ -8,18 +8,19 @@ the case.
 !#
 
 (define-module (gn db sparql)
-  #:use-module (json)
-  #:use-module (ice-9 match)
+  #:use-module (gn cache memoize)
+  #:use-module (gn db sources wikidata)
   #:use-module (ice-9 format)
   #:use-module (ice-9 iconv)
+  #:use-module (ice-9 match)
   #:use-module (ice-9 receive)
   #:use-module (ice-9 string-fun)
+  #:use-module (json)
+  #:use-module (srfi srfi-1)
   #:use-module (web client)
+  #:use-module (web gn-uri)
   #:use-module (web request)
   #:use-module (web uri)
-  #:use-module (gn cache memoize)
-  #:use-module (gn db sources wikidata)
-  #:use-module (web gn-uri)
 
   #:export (memo-sparql-species
             memo-sparql-species-meta
@@ -27,6 +28,8 @@ the case.
             sparql-groups-meta
             sparql-group-info
             memo-sparql-wd-species-info
+            memo-sparql-wd-gene-aliases
+            memo-sparql-wd-geneids
             compile-species
             compile-groups-meta
             get-rows
@@ -73,7 +76,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 
 (define (sparql-tsv endpoint-url query)
   "Execute raw SPARQL query returning response as a UTF8 string, e.g.
-(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\"))
+(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")).
+
+Note this procedure works for wikidata, but not for gn!
 "
   ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2
   (receive (response-status response-body)
@@ -93,7 +98,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
   (unpack "bindings" (unpack "results" response)))
 
 (define (sparql-scm endpoint-url query)
-  "Return dual S-exp 'resultset' of varnames and results"
+  "Return dual S-exp 'resultset' of varnames and results.
+
+Note this procedure works for GN, but does not yet work for wikidata"
   (let ((response (json-string->scm
                    (sparql-exec endpoint-url (gn-sparql-prefix query)))))
    (values (sparql-names response) (sparql-results response))))
@@ -161,9 +168,47 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where {
 
 ")))
 
+(define (flatten lst)
+  (cond ((null? lst) '())
+        ((pair? lst) (append (flatten (car lst)) (flatten (cdr lst))))
+        (else (list lst))))
+
+(define (remove-quotes s)
+  (substring s 1 (- (string-length s) 1)))
+
 (define memo-sparql-wd-species-info
   (memoize sparql-wd-species-info))
 
+(define (sparql-wd-geneids gene-name)
+  "Return a list of expanded wikidata ids, e.g.
+(\"<http://www.wikidata.org/entity/Q14860079>\" \"<http://www.wikidata.org/entity/Q24420953>\")"
+  (receive (type values)
+      (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-geneids gene-name)))
+    (map (lambda (item) (car item)) values) ;; flatten list
+    ))
+
+(define memo-sparql-wd-geneids
+  (memoize sparql-wd-geneids))
+
+(define (sparql-wd-gene-aliases geneids)
+  "Returns a flattened and dedpulicated list of geneids with
+(sparql-wd-gene-aliases '(\"Q14860079\" \"Q24420953\"))
+"
+  (let* ([aliases
+         (map (lambda (geneid)
+                (receive (type values)
+                    (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-gene-aliases (pk geneid))))
+                  (map (lambda (item) (car item)) values) ;; flatten list))
+                  )
+                ) geneids)]
+         [rm-quotes-aliases (map (lambda (s) (remove-quotes s)) (flatten aliases))]
+         )
+    (delete-duplicates rm-quotes-aliases)))
+
+(define memo-sparql-wd-gene-aliases
+  (memoize sparql-wd-gene-aliases))
+
+
 #!
 gn:Mus_musculus rdf:type gnc:species .
 gn:Mus_musculus gnt:name "Mouse" .