about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/db/rdf/wiki.py58
-rw-r--r--tests/unit/db/rdf/data.py221
-rw-r--r--tests/unit/db/rdf/test_wiki.py14
3 files changed, 293 insertions, 0 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py
index b2b301a..5e8e02e 100644
--- a/gn3/db/rdf/wiki.py
+++ b/gn3/db/rdf/wiki.py
@@ -9,6 +9,7 @@ NOTE: In the CONSTRUCT queries below, we manually sort the arrays from
    <https://stackoverflow.com/questions/78186393>
    <https://www.w3.org/TR/rdf-sparql-query/#modOrderBy>
 """
+from datetime import datetime
 from string import Template
 from gn3.db.rdf import (
     BASE_CONTEXT,
@@ -35,6 +36,20 @@ WIKI_CONTEXT = BASE_CONTEXT | {
     "id": "dct:identifier",
 }
 
+RIF_CONTEXT = BASE_CONTEXT | {
+    "dct": "http://purl.org/dc/terms/",
+    "skos": "http://www.w3.org/2004/02/skos/core#",
+    "symbol": "gnt:symbol",
+    "species": "gnt:species",
+    "taxonomic_id": "skos:notation",
+    "gene_id": "gnt:hasGeneId",
+    "pubmed_id": "dct:references",
+    "created": "dct:created",
+    "comment": "rdfs:comment",
+    "version": "dct:hasVersion",
+    "id": "dct:identifier",
+}
+
 
 def __sanitize_result(result: dict) -> dict:
     """Make sure `categories` and `pubmed_ids` are always arrays"""
@@ -244,3 +259,46 @@ $comment_triple}
         sparql_password=sparql_password,
         sparql_auth_uri=sparql_auth_uri,
     )
+
+
+def get_rif_entries_by_symbol(
+        symbol: str, sparql_uri: str, graph: str = "<http://genenetwork.org>"
+) -> dict:
+    """Fetch NCBI RIF entries by a symbol.  Symbol here is case in-sensitive."""
+    query = Template("""
+$prefix
+
+CONSTRUCT {
+    ?comment gnt:symbol ?symbol ;
+             gnt:species ?species ;
+             dct:references ?pmid ;
+             rdfs:comment ?text ;
+             dct:hasVersion ?version ;
+             dct:created ?created ;
+             gnt:hasGeneId ?gene_id ;
+             skos:notation ?taxonId .
+} FROM $graph WHERE {
+    ?comment rdfs:label ?text_ ;
+             gnt:symbol ?symbol ;
+             rdf:type gnc:NCBIWikiEntry ;
+             gnt:hasGeneId ?gene_id_ ;
+             dct:hasVersion ?version ;
+             dct:references ?pmid_ ;
+             dct:created ?createTime ;
+             gnt:belongsToSpecies ?speciesId .
+    ?speciesId gnt:shortName ?species .
+    FILTER ( LCASE(STR(?symbol)) = LCASE("$symbol") ) .
+    OPTIONAL { ?comment skos:notation ?taxonId_ . } .
+    BIND (STR(?text_) AS ?text) .
+    BIND (xsd:integer(STRAFTER(STR(?taxonId_), STR(taxon:))) AS ?taxonId) .
+    BIND (xsd:integer(STRAFTER(STR(?pmid_), STR(pubmed:))) AS ?pmid) .
+    BIND (xsd:integer(STRAFTER(STR(?gene_id_), STR(generif:))) AS ?gene_id) .
+    BIND (STR(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, graph=graph, symbol=symbol)
+    results = query_frame_and_compact(query, RIF_CONTEXT, sparql_uri)
+    results["data"] = sorted(
+        results["data"],
+        key=lambda k: (k["species"],
+                       datetime.strptime(k["created"], "%Y-%m-%d %H:%M:%S")))
+    return results
diff --git a/tests/unit/db/rdf/data.py b/tests/unit/db/rdf/data.py
new file mode 100644
index 0000000..b4d0590
--- /dev/null
+++ b/tests/unit/db/rdf/data.py
@@ -0,0 +1,221 @@
+"""Some test data to be used in RDF data."""
+
+LPL_RIF_ENTRIES = {
+    "@context": {
+        "data": "@graph",
+        "type": "@type",
+        "gn": "http://genenetwork.org/id/",
+        "gnc": "http://genenetwork.org/category/",
+        "gnt": "http://genenetwork.org/term/",
+        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+        "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+        "dct": "http://purl.org/dc/terms/",
+        "skos": "http://www.w3.org/2004/02/skos/core#",
+        "symbol": "gnt:symbol",
+        "species": "gnt:species",
+        "taxonomic_id": "skos:notation",
+        "gene_id": "gnt:hasGeneId",
+        "pubmed_id": "dct:references",
+        "created": "dct:created",
+        "comment": "rdfs:comment",
+        "version": "dct:hasVersion",
+        "id": "dct:identifier",
+    },
+    "data": [
+        {
+            "@id": "gn:rif-4023-36763533-2023-02-23T20:40:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-02-23 20:40:00",
+            "pubmed_id": 36763533,
+            "comment": "Angiopoietin-like protein 4/8 complex-mediated plasmin generation \
+leads to cleavage of the complex and restoration of LPL activity.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-36652113-2023-04-07T20:39:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-04-07 20:39:00",
+            "pubmed_id": 36652113,
+            "comment": "The breast cancer microenvironment and lipoprotein lipase: \
+Another negative notch for a beneficial enzyme?",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-36519761-2023-04-27T20:33:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-04-27 20:33:00",
+            "pubmed_id": 36519761,
+            "comment": "Parkin regulates neuronal lipid homeostasis through \
+SREBP2-lipoprotein lipase pathway-implications for Parkinson's disease.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-36708756-2023-05-22T20:32:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-05-22 20:32:00",
+            "pubmed_id": 36708756,
+            "comment": "Plasma Lipoprotein Lipase Is Associated with Risk of \
+Future Major Adverse Cardiovascular Events in Patients Following Carotid Endarterectomy.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37155355-2023-07-04T21:12:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-07-04 21:12:00",
+            "pubmed_id": 37155355,
+            "comment": "Inverse association between apolipoprotein C-II and \
+cardiovascular mortality: role of lipoprotein lipase activity modulation.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37432202-2023-07-13T20:35:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-07-13 20:35:00",
+            "pubmed_id": 37432202,
+            "comment": "Effect of the Interaction between Seaweed Intake and LPL \
+Polymorphisms on Metabolic Syndrome in Middle-Aged Korean Adults.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37568214-2023-08-14T20:37:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-08-14 20:37:00",
+            "pubmed_id": 37568214,
+            "comment": "Frameshift coding sequence variants in the LPL gene: identification \
+of two novel events and exploration of the genotype-phenotype relationship for \
+variants reported to date.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37550668-2023-08-22T20:29:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-08-22 20:29:00",
+            "pubmed_id": 37550668,
+            "comment": "The East Asian-specific LPL p.Ala288Thr (c.862G > A) missense \
+variant exerts a mild effect on protein function.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37128695-2023-09-12T20:35:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-09-12 20:35:00",
+            "pubmed_id": 37128695,
+            "comment": "Interaction between APOE, APOA1, and LPL Gene Polymorphisms \
+and Variability in Changes in Lipid and Blood Pressure following Orange Juice Intake: \
+A Pilot Study.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37427758-2023-09-25T09:33:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-09-25 09:33:00",
+            "pubmed_id": 37427758,
+            "comment": "Variants within the LPL gene confer susceptility to \
+diabetic kidney disease and rapid decline in kidney function in Chinese patients \
+with type 2 diabetes.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37901192-2023-11-01T08:55:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-11-01 08:55:00",
+            "pubmed_id": 37901192,
+            "comment": "The Association of Adipokines and Myokines in the \
+Blood of Obese Children and Adolescents with Lipoprotein Lipase rs328 Gene Variants.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37871217-2023-11-10T08:44:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-11-10 08:44:00",
+            "pubmed_id": 37871217,
+            "comment": "The lipoprotein lipase that is shuttled into \
+capillaries by GPIHBP1 enters the glycocalyx where it mediates lipoprotein processing.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-37858495-2023-12-28T20:33:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-12-28 20:33:00",
+            "pubmed_id": 37858495,
+            "comment": "Clinical profile, genetic spectrum and therapy \
+evaluation of 19 Chinese pediatric patients with lipoprotein lipase deficiency.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-4023-38114521-2023-12-29T20:33:00-5",
+            "gene_id": 4023,
+            "version": 5,
+            "species": "human",
+            "symbol": "LPL",
+            "created": "2023-12-29 20:33:00",
+            "pubmed_id": 38114521,
+            "comment": "Developing a model to predict the early risk of \
+hypertriglyceridemia based on inhibiting lipoprotein lipase (LPL): a translational study.",
+            "taxonomic_id": 9606,
+        },
+        {
+            "@id": "gn:rif-16956-36519761-2023-04-27T20:33:00-5",
+            "gene_id": 16956,
+            "version": 5,
+            "species": "mouse",
+            "symbol": "Lpl",
+            "created": "2023-04-27 20:33:00",
+            "pubmed_id": 36519761,
+            "comment": "Parkin regulates neuronal lipid homeostasis through \
+SREBP2-lipoprotein lipase pathway-implications for Parkinson's disease.",
+            "taxonomic_id": 10090,
+        },
+        {
+            "@id": "gn:rif-24539-38114521-2023-12-29T20:33:00-5",
+            "gene_id": 24539,
+            "version": 5,
+            "species": "rat",
+            "symbol": "Lpl",
+            "created": "2023-12-29 20:33:00",
+            "pubmed_id": 38114521,
+            "comment": "Developing a model to predict the early risk of \
+hypertriglyceridemia based on inhibiting lipoprotein lipase (LPL): a translational study.",
+            "taxonomic_id": 10116,
+        },
+    ],
+}
diff --git a/tests/unit/db/rdf/test_wiki.py b/tests/unit/db/rdf/test_wiki.py
index 3abf3ad..7a0dc3a 100644
--- a/tests/unit/db/rdf/test_wiki.py
+++ b/tests/unit/db/rdf/test_wiki.py
@@ -22,11 +22,14 @@ from tests.fixtures.rdf import (
     SPARQL_CONF,
 )
 
+from tests.unit.db.rdf.data import LPL_RIF_ENTRIES
+
 from gn3.db.rdf.wiki import (
     __sanitize_result,
     get_wiki_entries_by_symbol,
     get_comment_history,
     update_wiki_comment,
+    get_rif_entries_by_symbol,
 )
 
 GRAPH = "<http://cd-test.genenetwork.org>"
@@ -396,3 +399,14 @@ def test_update_wiki_comment(rdf_setup):  # pylint: disable=W0613,W0621
         "version": 3,
         "web_url": "http://some-website.com",
     })
+
+
+@pytest.mark.rdf
+def test_get_rif_entries_by_symbol(rdf_setup):  # pylint: disable=W0613,W0621
+    """Test fetching NCBI Rif Metadata from RDF"""
+    sparql_conf = SPARQL_CONF
+    assert get_rif_entries_by_symbol(
+        symbol="Lpl",
+        sparql_uri=sparql_conf["sparql_endpoint"],
+        graph=GRAPH,
+    ) == LPL_RIF_ENTRIES