From d04779286b49346946120a5801509ba80ee987ba Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 14 Apr 2023 15:37:31 +0300 Subject: Create new endpoint for fetching GeneRIF entries * gn3/api/metadata.py: Import Template, sparql_query and RDF_PREFIXES. (get_genewiki_entries): New endpoint. * gn3/db/rdf.py: Add new constant for storing rdf prefixes. Signed-off-by: Munyoki Kilyungi --- gn3/api/metadata.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gn3/db/rdf.py | 15 +++++++++++ 2 files changed, 89 insertions(+) (limited to 'gn3') diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py index f87743b..d542dc6 100644 --- a/gn3/api/metadata.py +++ b/gn3/api/metadata.py @@ -1,4 +1,5 @@ """API for fetching metadata using an API""" +from string import Template from http.client import RemoteDisconnected from urllib.error import URLError from flask import Blueprint @@ -9,6 +10,8 @@ from SPARQLWrapper import SPARQLWrapper from gn3.db.rdf import get_dataset_metadata from gn3.db.rdf import get_trait_metadata +from gn3.db.rdf import sparql_query +from gn3.db.rdf import RDF_PREFIXES metadata = Blueprint("metadata", __name__) @@ -42,3 +45,74 @@ def trait_metadata(dataset_name, trait_name): # The virtuoso server is misconfigured or it isn't running at all except (RemoteDisconnected, URLError): return jsonify({}) + + +@metadata.route("/genewiki/", methods=["GET"]) +def get_genewiki_entries(symbol): + """Fetch the GN and NCBI GeneRIF entries""" + try: + gn_entries = sparql_query( + sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")), + query=Template(""" +$rdf_prefixes + +SELECT ?author ?geneCategory (STR(?gnEntry) AS ?entry) + (STR(?createdOn) AS ?created) + (GROUP_CONCAT(DISTINCT ?pmid; SEPARATOR=',') AS ?PubMedId) + ?weburl +WHERE { + ?generif gn:symbol ?symbol . + ?generif gn:geneWikiEntryOfGn _:gnEntry . + _:gnEntry gn:geneWikiEntry ?gnEntry; + dct:creator ?author; + dct:created ?createdOn . + OPTIONAL { _:gnEntry gn:geneCategory ?geneCategory } . + OPTIONAL { _:gnEntry foaf:homepage ?weburl } . + OPTIONAL { _:gnEntry dct:source ?pmid} . + OPTIONAL { + ?generif gn:wikiEntryOfSpecies ?speciesName . + ?species gn:name ?speciesName ; + gn:binomialName ?speciesBinomialName . + } . + FILTER( lcase(?symbol) = '$symbol' ) +} GROUP BY ?author ?createdOn ?gnEntry + ?generif ?symbol ?weburl + ?geneCategory +ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES, + symbol=str(symbol).lower())) + ncbi_entries = sparql_query( + sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")), + query=Template(""" +$rdf_prefixes + +SELECT ?speciesBinomialName (STR(?gnEntry) AS ?entry) + (STR(?createdOn) AS ?createdOn) + (GROUP_CONCAT(DISTINCT REPLACE(STR(?pmid), pubmed:, ''); SEPARATOR=',') AS ?PubMedId) + ?generif +WHERE { + ?generif gn:symbol ?symbol . + ?generif gn:geneWikiEntryOfNCBI [ + gn:geneWikiEntry ?gnEntry ; + dct:created ?createdOn ; + dct:source ?pmid + ] . + OPTIONAL { + ?generif gn:wikiEntryOfSpecies ?speciesName . + ?species gn:name ?speciesName ; + gn:binomialName ?speciesBinomialName . + } . + FILTER( lcase(?symbol) = '$symbol' ) +} GROUP BY ?createdOn ?gnEntry + ?generif ?symbol + ?speciesBinomialName +ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES, + symbol=str(symbol).lower())) + return jsonify({ + "gn_entries": list(map(lambda x: x.data, gn_entries)), + "ncbi_entries": list(map(lambda x: x.data, ncbi_entries)), + }) + except (RemoteDisconnected, URLError): + return jsonify({ + "gn_entries": {}, + "ncbi_entries": {}, + }) diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py index deecefa..3e8d513 100644 --- a/gn3/db/rdf.py +++ b/gn3/db/rdf.py @@ -10,6 +10,21 @@ from pymonad.maybe import Just from gn3.monads import MonadicDict +RDF_PREFIXES = """PREFIX dct: +PREFIX foaf: +PREFIX generif: +PREFIX gn: +PREFIX owl: +PREFIX pubmed: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: +PREFIX up: +PREFIX xsd: + +""" + + def sparql_query( sparql_conn: SPARQLWrapper, query: str ) -> Tuple[MonadicDict, ...]: -- cgit v1.2.3