diff options
-rw-r--r-- | gn/data/strains.scm | 2 | ||||
-rw-r--r-- | gn/db/sources/wikidata.scm | 49 | ||||
-rw-r--r-- | gn/db/sparql.scm | 26 | ||||
-rw-r--r-- | web/webserver.scm | 7 |
4 files changed, 74 insertions, 10 deletions
diff --git a/gn/data/strains.scm b/gn/data/strains.scm index c560d9b..07b69ff 100644 --- a/gn/data/strains.scm +++ b/gn/data/strains.scm @@ -25,7 +25,7 @@ "Return assoc list of tuples of strain id+names: ((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)... -used-for-mapping? will say whether the strains/individuals are used for mapping. +optional key used-for-mapping? will say whether the strains/individuals are used for mapping. " (call-with-db (lambda (db) diff --git a/gn/db/sources/wikidata.scm b/gn/db/sources/wikidata.scm index 7397426..fe495c5 100644 --- a/gn/db/sources/wikidata.scm +++ b/gn/db/sources/wikidata.scm @@ -1,10 +1,38 @@ #! -Wikidata queries +Wikidata queries, initially lifted over from the gn3 gene-alias code (that was written in Racket). +Note you can take a SPARQL query and push it into https://query.wikidata.org/. E.g. generate a query and +copy paste into the query service: + +scheme@(guile-user) [3]> (display (wikidata-query-geneids "Shh")) +``` +SELECT DISTINCT ?wikidata_id + WHERE { + ?wikidata_id wdt:P31 wd:Q7187; + wdt:P703 ?species . + VALUES (?species) { (wd:Q15978631 ) ( wd:Q83310 ) ( wd:Q184224 ) } . + ?wikidata_id rdfs:label "Shh"@en . + } +``` + +It is possible to run queries through curl with + +``` +curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query=" + SELECT DISTINCT ?alias + WHERE { + wd:Q24420953 rdfs:label ?name ; + skos:altLabel ?alias . + FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\"). + }" +``` !# (define-module (gn db sources wikidata) + #:export (wikidata-query-geneids + wikidata-query-gene-aliases + ) ) (define ps-encoded-by "ps:P702") @@ -14,16 +42,23 @@ Wikidata queries (define wd-mouse "wd:Q83310") (define wd-rat "wd:Q184224") (define wd-gene "wd:Q7187") +(define wd-shh-rat "wd:Q24420953") -(define (wikidata_query_geneids gene_name) - "Return the wikidata identifiers pointing to genes of listed species" +(define (wikidata-query-geneids gene_name) + "SPARQL query to get the wikidata identifiers pointing to genes of listed species, e.g. 'Shh'" (string-append "SELECT DISTINCT ?wikidata_id WHERE { ?wikidata_id " wdt-instance-of " " wd-gene "; " wdt-in-taxon " ?species . VALUES (?species) { (" wd-human " ) ( " wd-mouse" ) ( " wd-rat" ) } . - ?wikidata_id rdfs:label \"" gene_name "\"@en . - } -" - )) + ?wikidata_id rdfs:label \"" gene_name "\"@en .}")) + +(define (wikidata-query-gene-aliases wikidata_id) + "SPARQL query to get a list of gene aliases based on a wikidata identifier, e.g. for Q24420953" + (string-append + "SELECT DISTINCT ?alias + WHERE { + wd:" wikidata_id " rdfs:label ?name ; + skos:altLabel ?alias . + FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\").}")) diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm index f03389b..86d163f 100644 --- a/gn/db/sparql.scm +++ b/gn/db/sparql.scm @@ -73,7 +73,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> (define (sparql-tsv endpoint-url query) "Execute raw SPARQL query returning response as a UTF8 string, e.g. -(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")) +(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")). + +Note this procedure works for wikidata, but not for gn! " ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2 (receive (response-status response-body) @@ -93,7 +95,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> (unpack "bindings" (unpack "results" response))) (define (sparql-scm endpoint-url query) - "Return dual S-exp 'resultset' of varnames and results" + "Return dual S-exp 'resultset' of varnames and results. + +Note this procedure works for GN, but does not yet work for wikidata" (let ((response (json-string->scm (sparql-exec endpoint-url (gn-sparql-prefix query))))) (values (sparql-names response) (sparql-results response)))) @@ -164,6 +168,24 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where { (define memo-sparql-wd-species-info (memoize sparql-wd-species-info)) +(define (sparql-wd-geneids gene-name) + "Return a list of expanded wikidata ids, e.g. +(\"<http://www.wikidata.org/entity/Q14860079>\" \"<http://www.wikidata.org/entity/Q24420953>\")" + (receive (type values) + (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-geneids gene-name))) + (map (lambda (item) (car item)) values) ;; flatten list)) + +(define memo-sparql-wd-geneids + (memoize sparql-wd-geneids)) + +(define (sparql-wd-gene-aliases geneids) + (let ([geneid (car geneids)]) + (receive (type values) + (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-gene-aliases geneid))) + (map (lambda (item) (car item)) values) ;; flatten list)) + ) + )) + #! gn:Mus_musculus rdf:type gnc:species . gn:Mus_musculus gnt:name "Mouse" . diff --git a/web/webserver.scm b/web/webserver.scm index d2a8c8d..430529b 100644 --- a/web/webserver.scm +++ b/web/webserver.scm @@ -56,6 +56,11 @@ otherwise search for set/group data" (if taxoninfo taxoninfo (cdr (get-group-data id))))) +(define (get-gene-aliases genename) + "Return a vector of aliases for genename." + #("Hx") + ) + (define (not-found2 request) (values (build-response #:code 404) (string-append "Resource X not found: " @@ -247,6 +252,8 @@ otherwise search for set/group data" (('GET "doc" path ... page) ;; serve documents from /doc/ (render-doc path page)) + (('GET "gene" "aliases" genename) + (render-json (get-gene-aliases genename))) (('GET "species.json") (render-json (get-species-data))) (('GET "species.meta.json") |