aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-04-14 15:37:31 +0300
committerBonfaceKilz2023-04-17 15:24:36 +0300
commitd04779286b49346946120a5801509ba80ee987ba (patch)
treee3ff261fbb2b3b906a489a6d571c4d56fc19c724
parent8af8105444522c2c71b5ddd36a550e964cddffbf (diff)
downloadgenenetwork3-d04779286b49346946120a5801509ba80ee987ba.tar.gz
Create new endpoint for fetching GeneRIF entries
* gn3/api/metadata.py: Import Template, sparql_query and RDF_PREFIXES. (get_genewiki_entries): New endpoint. * gn3/db/rdf.py: Add new constant for storing rdf prefixes. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--gn3/api/metadata.py74
-rw-r--r--gn3/db/rdf.py15
2 files changed, 89 insertions, 0 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index f87743b..d542dc6 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -1,4 +1,5 @@
"""API for fetching metadata using an API"""
+from string import Template
from http.client import RemoteDisconnected
from urllib.error import URLError
from flask import Blueprint
@@ -9,6 +10,8 @@ from SPARQLWrapper import SPARQLWrapper
from gn3.db.rdf import get_dataset_metadata
from gn3.db.rdf import get_trait_metadata
+from gn3.db.rdf import sparql_query
+from gn3.db.rdf import RDF_PREFIXES
metadata = Blueprint("metadata", __name__)
@@ -42,3 +45,74 @@ def trait_metadata(dataset_name, trait_name):
# The virtuoso server is misconfigured or it isn't running at all
except (RemoteDisconnected, URLError):
return jsonify({})
+
+
+@metadata.route("/genewiki/<symbol>", methods=["GET"])
+def get_genewiki_entries(symbol):
+ """Fetch the GN and NCBI GeneRIF entries"""
+ try:
+ gn_entries = sparql_query(
+ sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
+ query=Template("""
+$rdf_prefixes
+
+SELECT ?author ?geneCategory (STR(?gnEntry) AS ?entry)
+ (STR(?createdOn) AS ?created)
+ (GROUP_CONCAT(DISTINCT ?pmid; SEPARATOR=',') AS ?PubMedId)
+ ?weburl
+WHERE {
+ ?generif gn:symbol ?symbol .
+ ?generif gn:geneWikiEntryOfGn _:gnEntry .
+ _:gnEntry gn:geneWikiEntry ?gnEntry;
+ dct:creator ?author;
+ dct:created ?createdOn .
+ OPTIONAL { _:gnEntry gn:geneCategory ?geneCategory } .
+ OPTIONAL { _:gnEntry foaf:homepage ?weburl } .
+ OPTIONAL { _:gnEntry dct:source ?pmid} .
+ OPTIONAL {
+ ?generif gn:wikiEntryOfSpecies ?speciesName .
+ ?species gn:name ?speciesName ;
+ gn:binomialName ?speciesBinomialName .
+ } .
+ FILTER( lcase(?symbol) = '$symbol' )
+} GROUP BY ?author ?createdOn ?gnEntry
+ ?generif ?symbol ?weburl
+ ?geneCategory
+ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
+ symbol=str(symbol).lower()))
+ ncbi_entries = sparql_query(
+ sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
+ query=Template("""
+$rdf_prefixes
+
+SELECT ?speciesBinomialName (STR(?gnEntry) AS ?entry)
+ (STR(?createdOn) AS ?createdOn)
+ (GROUP_CONCAT(DISTINCT REPLACE(STR(?pmid), pubmed:, ''); SEPARATOR=',') AS ?PubMedId)
+ ?generif
+WHERE {
+ ?generif gn:symbol ?symbol .
+ ?generif gn:geneWikiEntryOfNCBI [
+ gn:geneWikiEntry ?gnEntry ;
+ dct:created ?createdOn ;
+ dct:source ?pmid
+ ] .
+ OPTIONAL {
+ ?generif gn:wikiEntryOfSpecies ?speciesName .
+ ?species gn:name ?speciesName ;
+ gn:binomialName ?speciesBinomialName .
+ } .
+ FILTER( lcase(?symbol) = '$symbol' )
+} GROUP BY ?createdOn ?gnEntry
+ ?generif ?symbol
+ ?speciesBinomialName
+ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
+ symbol=str(symbol).lower()))
+ return jsonify({
+ "gn_entries": list(map(lambda x: x.data, gn_entries)),
+ "ncbi_entries": list(map(lambda x: x.data, ncbi_entries)),
+ })
+ except (RemoteDisconnected, URLError):
+ return jsonify({
+ "gn_entries": {},
+ "ncbi_entries": {},
+ })
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index deecefa..3e8d513 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -10,6 +10,21 @@ from pymonad.maybe import Just
from gn3.monads import MonadicDict
+RDF_PREFIXES = """PREFIX dct: <http://purl.org/dc/terms/>
+PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+PREFIX generif: <http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>
+PREFIX gn: <http://genenetwork.org/>
+PREFIX owl: <http://www.w3.org/2002/07/owl#>
+PREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
+PREFIX up: <http://purl.uniprot.org/core/>
+PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+
+"""
+
+
def sparql_query(
sparql_conn: SPARQLWrapper, query: str
) -> Tuple[MonadicDict, ...]: