about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn/db/sources/wikidata.scm9
-rw-r--r--gn/db/sparql.scm42
2 files changed, 35 insertions, 16 deletions
diff --git a/gn/db/sources/wikidata.scm b/gn/db/sources/wikidata.scm
index fe495c5..954ce93 100644
--- a/gn/db/sources/wikidata.scm
+++ b/gn/db/sources/wikidata.scm
@@ -55,10 +55,11 @@ curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=
               ?wikidata_id rdfs:label \"" gene_name "\"@en .}"))
 
 (define (wikidata-query-gene-aliases wikidata_id)
-  "SPARQL query to get a list of gene aliases based on a wikidata identifier, e.g. for Q24420953"
+  "SPARQL query to get a list of gene aliases based on a wikidata identifier, e.g. for Q24420953. This
+version supports the expanded id only, so <http://www.wikidata.org/entity/Q24420953> including the <,>."
   (string-append
-      "SELECT DISTINCT ?alias
-             WHERE {
-                     wd:" wikidata_id " rdfs:label ?name ;
+      "SELECT DISTINCT ?stripped_alias
+             WHERE { " wikidata_id " rdfs:label ?name ;
                          skos:altLabel ?alias .
+                         BIND (STR(?alias)  AS ?stripped_alias) .
                      FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\").}"))
diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm
index bc2bb29..c6c7cd8 100644
--- a/gn/db/sparql.scm
+++ b/gn/db/sparql.scm
@@ -8,18 +8,19 @@ the case.
 !#
 
 (define-module (gn db sparql)
-  #:use-module (json)
-  #:use-module (ice-9 match)
+  #:use-module (gn cache memoize)
+  #:use-module (gn db sources wikidata)
   #:use-module (ice-9 format)
   #:use-module (ice-9 iconv)
+  #:use-module (ice-9 match)
   #:use-module (ice-9 receive)
   #:use-module (ice-9 string-fun)
+  #:use-module (json)
+  #:use-module (srfi srfi-1)
   #:use-module (web client)
+  #:use-module (web gn-uri)
   #:use-module (web request)
   #:use-module (web uri)
-  #:use-module (gn cache memoize)
-  #:use-module (gn db sources wikidata)
-  #:use-module (web gn-uri)
 
   #:export (memo-sparql-species
             memo-sparql-species-meta
@@ -165,6 +166,14 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where {
 
 ")))
 
+(define (flatten lst)
+  (cond ((null? lst) '())
+        ((pair? lst) (append (flatten (car lst)) (flatten (cdr lst))))
+        (else (list lst))))
+
+(define (remove-quotes s)
+  (substring s 1 (- (string-length s) 1)))
+
 (define memo-sparql-wd-species-info
   (memoize sparql-wd-species-info))
 
@@ -173,7 +182,8 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where {
 (\"<http://www.wikidata.org/entity/Q14860079>\" \"<http://www.wikidata.org/entity/Q24420953>\")"
   (receive (type values)
       (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-geneids gene-name)))
-    (map (lambda (item) (car item)) values) ;; flatten list))
+    (map (lambda (item) (car item)) values) ;; flatten list
+    ))
 
 (define memo-sparql-wd-geneids
   (memoize sparql-wd-geneids))
@@ -182,12 +192,20 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where {
   "Returns a flattened and dedpulicated list of geneids with
 (sparql-wd-gene-aliases '(\"Q14860079\" \"Q24420953\"))
 "
-  (map (lambda (geneid)
-         (receive (type values)
-             (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-gene-aliases (pk geneid))))
-           (map (lambda (item) (car item)) values) ;; flatten list))
-      )
-  ) geneids))
+  (let* ([aliases
+         (map (lambda (geneid)
+                (receive (type values)
+                    (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-gene-aliases (pk geneid))))
+                  (map (lambda (item) (car item)) values) ;; flatten list))
+                  )
+                ) geneids)]
+         [rm-quotes-aliases (map (lambda (s) (remove-quotes s)) (flatten aliases))]
+         )
+    (delete-duplicates rm-quotes-aliases)))
+
+(define memo-sparql-wd-gene-aliases
+  (memoize sparql-wd-gene-aliases))
+
 
 #!
 gn:Mus_musculus rdf:type gnc:species .