aboutsummaryrefslogtreecommitdiff
path: root/gn/db/sources/wikidata.scm
blob: 954ce93791a8a888de8c29b126fdf70b042e2cbb (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!

Wikidata queries, initially lifted over from the gn3 gene-alias code (that was written in Racket).

Note you can take a SPARQL query and push it into https://query.wikidata.org/. E.g. generate a query and
copy paste into the query service:

scheme@(guile-user) [3]> (display (wikidata-query-geneids "Shh"))
```
SELECT DISTINCT ?wikidata_id
            WHERE {
              ?wikidata_id wdt:P31 wd:Q7187;
                           wdt:P703 ?species .
              VALUES (?species) { (wd:Q15978631 ) ( wd:Q83310 ) ( wd:Q184224 ) } .
              ?wikidata_id rdfs:label "Shh"@en .
              }
```

It is possible to run queries through curl with

```
curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="
    SELECT DISTINCT ?alias
             WHERE {
                     wd:Q24420953 rdfs:label ?name ;
                         skos:altLabel ?alias .
                     FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\").
                   }"
```
!#

(define-module (gn db sources wikidata)
  #:export (wikidata-query-geneids
            wikidata-query-gene-aliases
            )
)

(define ps-encoded-by "ps:P702")
(define wdt-instance-of "wdt:P31")
(define wdt-in-taxon "wdt:P703")
(define wd-human "wd:Q15978631")
(define wd-mouse "wd:Q83310")
(define wd-rat "wd:Q184224")
(define wd-gene "wd:Q7187")
(define wd-shh-rat "wd:Q24420953")

(define (wikidata-query-geneids gene_name)
  "SPARQL query to get the wikidata identifiers pointing to genes of listed species, e.g. 'Shh'"
  (string-append
     "SELECT DISTINCT ?wikidata_id
            WHERE {
              ?wikidata_id " wdt-instance-of " " wd-gene ";
                           " wdt-in-taxon " ?species .
              VALUES (?species) { (" wd-human " ) ( " wd-mouse" ) ( " wd-rat" ) } .
              ?wikidata_id rdfs:label \"" gene_name "\"@en .}"))

(define (wikidata-query-gene-aliases wikidata_id)
  "SPARQL query to get a list of gene aliases based on a wikidata identifier, e.g. for Q24420953. This
version supports the expanded id only, so <http://www.wikidata.org/entity/Q24420953> including the <,>."
  (string-append
      "SELECT DISTINCT ?stripped_alias
             WHERE { " wikidata_id " rdfs:label ?name ;
                         skos:altLabel ?alias .
                         BIND (STR(?alias)  AS ?stripped_alias) .
                     FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\").}"))