about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-04-14 15:37:31 +0300
committerBonfaceKilz2023-04-17 15:24:36 +0300
commitd04779286b49346946120a5801509ba80ee987ba (patch)
treee3ff261fbb2b3b906a489a6d571c4d56fc19c724
parent8af8105444522c2c71b5ddd36a550e964cddffbf (diff)
downloadgenenetwork3-d04779286b49346946120a5801509ba80ee987ba.tar.gz
Create new endpoint for fetching GeneRIF entries
* gn3/api/metadata.py: Import Template, sparql_query and RDF_PREFIXES.
(get_genewiki_entries): New endpoint.
* gn3/db/rdf.py: Add new constant for storing rdf prefixes.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--gn3/api/metadata.py74
-rw-r--r--gn3/db/rdf.py15
2 files changed, 89 insertions, 0 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index f87743b..d542dc6 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -1,4 +1,5 @@
 """API for fetching metadata using an API"""
+from string import Template
 from http.client import RemoteDisconnected
 from urllib.error import URLError
 from flask import Blueprint
@@ -9,6 +10,8 @@ from SPARQLWrapper import SPARQLWrapper
 
 from gn3.db.rdf import get_dataset_metadata
 from gn3.db.rdf import get_trait_metadata
+from gn3.db.rdf import sparql_query
+from gn3.db.rdf import RDF_PREFIXES
 
 
 metadata = Blueprint("metadata", __name__)
@@ -42,3 +45,74 @@ def trait_metadata(dataset_name, trait_name):
     # The virtuoso server is misconfigured or it isn't running at all
     except (RemoteDisconnected, URLError):
         return jsonify({})
+
+
+@metadata.route("/genewiki/<symbol>", methods=["GET"])
+def get_genewiki_entries(symbol):
+    """Fetch the GN and NCBI GeneRIF entries"""
+    try:
+        gn_entries = sparql_query(
+            sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
+            query=Template("""
+$rdf_prefixes
+
+SELECT ?author ?geneCategory (STR(?gnEntry) AS ?entry)
+       (STR(?createdOn) AS ?created)
+       (GROUP_CONCAT(DISTINCT ?pmid; SEPARATOR=',') AS ?PubMedId)
+       ?weburl
+WHERE {
+  ?generif gn:symbol ?symbol .
+  ?generif gn:geneWikiEntryOfGn _:gnEntry .
+  _:gnEntry gn:geneWikiEntry ?gnEntry;
+            dct:creator ?author;
+            dct:created ?createdOn .
+  OPTIONAL { _:gnEntry gn:geneCategory ?geneCategory } .
+  OPTIONAL { _:gnEntry foaf:homepage ?weburl } .
+  OPTIONAL { _:gnEntry dct:source ?pmid} .
+  OPTIONAL {
+    ?generif gn:wikiEntryOfSpecies ?speciesName .
+    ?species gn:name ?speciesName ;
+             gn:binomialName ?speciesBinomialName .
+  } .
+  FILTER( lcase(?symbol) = '$symbol' )
+} GROUP BY ?author ?createdOn ?gnEntry
+           ?generif ?symbol ?weburl
+	   ?geneCategory
+ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
+                                        symbol=str(symbol).lower()))
+        ncbi_entries = sparql_query(
+            sparql_conn=SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
+            query=Template("""
+$rdf_prefixes
+
+SELECT ?speciesBinomialName (STR(?gnEntry) AS ?entry)
+       (STR(?createdOn) AS ?createdOn)
+       (GROUP_CONCAT(DISTINCT REPLACE(STR(?pmid), pubmed:, ''); SEPARATOR=',') AS ?PubMedId)
+       ?generif
+WHERE {
+  ?generif gn:symbol ?symbol .
+  ?generif gn:geneWikiEntryOfNCBI [
+    gn:geneWikiEntry ?gnEntry ;
+    dct:created ?createdOn ;
+    dct:source ?pmid
+  ] .
+  OPTIONAL {
+    ?generif gn:wikiEntryOfSpecies ?speciesName .
+    ?species gn:name ?speciesName ;
+             gn:binomialName ?speciesBinomialName .
+  } .
+  FILTER( lcase(?symbol) = '$symbol' )
+} GROUP BY ?createdOn ?gnEntry
+           ?generif ?symbol
+	   ?speciesBinomialName
+ORDER BY ASC(?createdOn)""").substitute(rdf_prefixes=RDF_PREFIXES,
+                                        symbol=str(symbol).lower()))
+        return jsonify({
+            "gn_entries": list(map(lambda x: x.data, gn_entries)),
+            "ncbi_entries": list(map(lambda x: x.data, ncbi_entries)),
+        })
+    except (RemoteDisconnected, URLError):
+        return jsonify({
+            "gn_entries": {},
+            "ncbi_entries": {},
+        })
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index deecefa..3e8d513 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -10,6 +10,21 @@ from pymonad.maybe import Just
 from gn3.monads import MonadicDict
 
 
+RDF_PREFIXES = """PREFIX dct: <http://purl.org/dc/terms/>
+PREFIX foaf: <http://xmlns.com/foaf/0.1/>
+PREFIX generif: <http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>
+PREFIX gn: <http://genenetwork.org/>
+PREFIX owl: <http://www.w3.org/2002/07/owl#>
+PREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
+PREFIX up: <http://purl.uniprot.org/core/>
+PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+
+"""
+
+
 def sparql_query(
         sparql_conn: SPARQLWrapper, query: str
 ) -> Tuple[MonadicDict, ...]: