aboutsummaryrefslogtreecommitdiff
path: root/gn3/api/metadata.py
blob: f21739ac709a913b6a1769587a0c85ae5ff3e368 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""API for fetching metadata using an API"""
from string import Template
from http.client import RemoteDisconnected
from urllib.error import URLError
from flask import Blueprint
from flask import jsonify
from flask import current_app

from SPARQLWrapper import SPARQLWrapper

from gn3.db.rdf import get_dataset_metadata
from gn3.db.rdf import get_publication_metadata
from gn3.db.rdf import get_phenotype_metadata
from gn3.db.rdf import get_genotype_metadata
from gn3.db.rdf import sparql_query
from gn3.db.rdf import RDF_PREFIXES


metadata = Blueprint("metadata", __name__)


@metadata.route("/dataset/<name>", methods=["GET"])
def dataset(name):
    """Fetch a dataset's metadata given it's ACCESSION_ID"""
    try:
        return jsonify(
            get_dataset_metadata(
                SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
                name,
            ).data
        )
    # The virtuoso server is misconfigured or it isn't running at all
    except (RemoteDisconnected, URLError):
        return jsonify({})


@metadata.route("/publication/<name>", methods=["GET"])
def publication(name):
    """Fetch a publication's metadata given it's ACCESSION_ID"""
    try:
        if "unpublished" in name:
            name = f"gn:{name}"
        else:
            name = f"publication:{name}"
        return jsonify(
            get_publication_metadata(
                SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
                name,
            ).data
        )
    # The virtuoso server is misconfigured or it isn't running at all
    except (RemoteDisconnected, URLError):
        return jsonify({})


@metadata.route("/phenotype/<name>", methods=["GET"])
def phenotype(name):
    """Fetch a phenotype's metadata given it's name"""
    try:
        return jsonify(
            get_phenotype_metadata(
                SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
                name,
            ).data
        )
    # The virtuoso server is misconfigured or it isn't running at all
    except (RemoteDisconnected, URLError):
        return jsonify({})


@metadata.route("/genotype/<name>", methods=["GET"])
def genotype(name):
    """Fetch a genotype's metadata given it's name"""
    try:
        return jsonify(
            get_genotype_metadata(
                SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
                name,
            ).data
        )
    # The virtuoso server is misconfigured or it isn't running at all
    except (RemoteDisconnected, URLError):
        return jsonify({})


@metadata.route("/genewiki/<symbol>", methods=["GET"])
def get_genewiki_entries(symbol):
    """Fetch the GN and NCBI GeneRIF entries"""
    try:
        gn_entries = sparql_query(
            sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
            query=Template("""
$rdf_prefixes

SELECT ?author ?geneCategory (STR(?gnEntry) AS ?entry)
       (STR(?createdOn) AS ?created)
       (GROUP_CONCAT(DISTINCT ?pmid; SEPARATOR=',') AS ?PubMedId)
       ?weburl
WHERE {
  ?generif gn:symbol ?symbol .
  ?generif gn:geneWikiEntryOfGn _:gnEntry .
  _:gnEntry gn:geneWikiEntry ?gnEntry;
            dct:creator ?author;
            dct:created ?createdOn .
  OPTIONAL { _:gnEntry gn:geneCategory ?geneCategory } .
  OPTIONAL { _:gnEntry foaf:homepage ?weburl } .
  OPTIONAL { _:gnEntry dct:source ?pmid} .
  OPTIONAL {
    ?generif gn:wikiEntryOfSpecies ?speciesName .
    ?species gn:name ?speciesName ;
             gn:binomialName ?speciesBinomialName .
  } .
  FILTER( lcase(?symbol) = '$symbol' )
} GROUP BY ?author ?createdOn ?gnEntry
           ?generif ?symbol ?weburl
	   ?geneCategory
ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
                                        symbol=str(symbol).lower()))
        ncbi_entries = sparql_query(
            sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
            query=Template("""
$rdf_prefixes

SELECT ?speciesBinomialName (STR(?gnEntry) AS ?entry)
       (STR(?createdOn) AS ?createdOn)
       (GROUP_CONCAT(DISTINCT REPLACE(STR(?pmid), pubmed:, ''); SEPARATOR=',') AS ?PubMedId)
       ?generif
WHERE {
  ?generif gn:symbol ?symbol .
  ?generif gn:geneWikiEntryOfNCBI [
    gn:geneWikiEntry ?gnEntry ;
    dct:created ?createdOn ;
    dct:source ?pmid
  ] .
  OPTIONAL {
    ?generif gn:wikiEntryOfSpecies ?speciesName .
    ?species gn:name ?speciesName ;
             gn:binomialName ?speciesBinomialName .
  } .
  FILTER( lcase(?symbol) = '$symbol' )
} GROUP BY ?createdOn ?gnEntry
           ?generif ?symbol
	   ?speciesBinomialName
ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
                                        symbol=str(symbol).lower()))
        return jsonify({
            "gn_entries": list(map(lambda x: x.data, gn_entries)),
            "ncbi_entries": list(map(lambda x: x.data, ncbi_entries)),
        })
    except (RemoteDisconnected, URLError):
        return jsonify({
            "gn_entries": {},
            "ncbi_entries": {},
        })