diff options
author | Munyoki Kilyungi | 2023-04-14 15:37:31 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-04-17 15:24:36 +0300 |
commit | d04779286b49346946120a5801509ba80ee987ba (patch) | |
tree | e3ff261fbb2b3b906a489a6d571c4d56fc19c724 | |
parent | 8af8105444522c2c71b5ddd36a550e964cddffbf (diff) | |
download | genenetwork3-d04779286b49346946120a5801509ba80ee987ba.tar.gz |
Create new endpoint for fetching GeneRIF entries
* gn3/api/metadata.py: Import Template, sparql_query and RDF_PREFIXES.
(get_genewiki_entries): New endpoint.
* gn3/db/rdf.py: Add new constant for storing rdf prefixes.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r-- | gn3/api/metadata.py | 74 | ||||
-rw-r--r-- | gn3/db/rdf.py | 15 |
2 files changed, 89 insertions, 0 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py index f87743b..d542dc6 100644 --- a/gn3/api/metadata.py +++ b/gn3/api/metadata.py @@ -1,4 +1,5 @@ """API for fetching metadata using an API""" +from string import Template from http.client import RemoteDisconnected from urllib.error import URLError from flask import Blueprint @@ -9,6 +10,8 @@ from SPARQLWrapper import SPARQLWrapper from gn3.db.rdf import get_dataset_metadata from gn3.db.rdf import get_trait_metadata +from gn3.db.rdf import sparql_query +from gn3.db.rdf import RDF_PREFIXES metadata = Blueprint("metadata", __name__) @@ -42,3 +45,74 @@ def trait_metadata(dataset_name, trait_name): # The virtuoso server is misconfigured or it isn't running at all except (RemoteDisconnected, URLError): return jsonify({}) + + +@metadata.route("/genewiki/<symbol>", methods=["GET"]) +def get_genewiki_entries(symbol): + """Fetch the GN and NCBI GeneRIF entries""" + try: + gn_entries = sparql_query( + sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")), + query=Template(""" +$rdf_prefixes + +SELECT ?author ?geneCategory (STR(?gnEntry) AS ?entry) + (STR(?createdOn) AS ?created) + (GROUP_CONCAT(DISTINCT ?pmid; SEPARATOR=',') AS ?PubMedId) + ?weburl +WHERE { + ?generif gn:symbol ?symbol . + ?generif gn:geneWikiEntryOfGn _:gnEntry . + _:gnEntry gn:geneWikiEntry ?gnEntry; + dct:creator ?author; + dct:created ?createdOn . + OPTIONAL { _:gnEntry gn:geneCategory ?geneCategory } . + OPTIONAL { _:gnEntry foaf:homepage ?weburl } . + OPTIONAL { _:gnEntry dct:source ?pmid} . + OPTIONAL { + ?generif gn:wikiEntryOfSpecies ?speciesName . + ?species gn:name ?speciesName ; + gn:binomialName ?speciesBinomialName . + } . + FILTER( lcase(?symbol) = '$symbol' ) +} GROUP BY ?author ?createdOn ?gnEntry + ?generif ?symbol ?weburl + ?geneCategory +ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES, + symbol=str(symbol).lower())) + ncbi_entries = sparql_query( + sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")), + query=Template(""" +$rdf_prefixes + +SELECT ?speciesBinomialName (STR(?gnEntry) AS ?entry) + (STR(?createdOn) AS ?createdOn) + (GROUP_CONCAT(DISTINCT REPLACE(STR(?pmid), pubmed:, ''); SEPARATOR=',') AS ?PubMedId) + ?generif +WHERE { + ?generif gn:symbol ?symbol . + ?generif gn:geneWikiEntryOfNCBI [ + gn:geneWikiEntry ?gnEntry ; + dct:created ?createdOn ; + dct:source ?pmid + ] . + OPTIONAL { + ?generif gn:wikiEntryOfSpecies ?speciesName . + ?species gn:name ?speciesName ; + gn:binomialName ?speciesBinomialName . + } . + FILTER( lcase(?symbol) = '$symbol' ) +} GROUP BY ?createdOn ?gnEntry + ?generif ?symbol + ?speciesBinomialName +ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES, + symbol=str(symbol).lower())) + return jsonify({ + "gn_entries": list(map(lambda x: x.data, gn_entries)), + "ncbi_entries": list(map(lambda x: x.data, ncbi_entries)), + }) + except (RemoteDisconnected, URLError): + return jsonify({ + "gn_entries": {}, + "ncbi_entries": {}, + }) diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py index deecefa..3e8d513 100644 --- a/gn3/db/rdf.py +++ b/gn3/db/rdf.py @@ -10,6 +10,21 @@ from pymonad.maybe import Just from gn3.monads import MonadicDict +RDF_PREFIXES = """PREFIX dct: <http://purl.org/dc/terms/> +PREFIX foaf: <http://xmlns.com/foaf/0.1/> +PREFIX generif: <http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=> +PREFIX gn: <http://genenetwork.org/> +PREFIX owl: <http://www.w3.org/2002/07/owl#> +PREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX taxon: <http://purl.uniprot.org/taxonomy/> +PREFIX up: <http://purl.uniprot.org/core/> +PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> + +""" + + def sparql_query( sparql_conn: SPARQLWrapper, query: str ) -> Tuple[MonadicDict, ...]: |