diff options
| -rw-r--r-- | gn3/db/rdf/wiki.py | 58 | ||||
| -rw-r--r-- | tests/unit/db/rdf/data.py | 221 | ||||
| -rw-r--r-- | tests/unit/db/rdf/test_wiki.py | 14 |
3 files changed, 293 insertions, 0 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py index b2b301a..5e8e02e 100644 --- a/gn3/db/rdf/wiki.py +++ b/gn3/db/rdf/wiki.py @@ -9,6 +9,7 @@ NOTE: In the CONSTRUCT queries below, we manually sort the arrays from <https://stackoverflow.com/questions/78186393> <https://www.w3.org/TR/rdf-sparql-query/#modOrderBy> """ +from datetime import datetime from string import Template from gn3.db.rdf import ( BASE_CONTEXT, @@ -35,6 +36,20 @@ WIKI_CONTEXT = BASE_CONTEXT | { "id": "dct:identifier", } +RIF_CONTEXT = BASE_CONTEXT | { + "dct": "http://purl.org/dc/terms/", + "skos": "http://www.w3.org/2004/02/skos/core#", + "symbol": "gnt:symbol", + "species": "gnt:species", + "taxonomic_id": "skos:notation", + "gene_id": "gnt:hasGeneId", + "pubmed_id": "dct:references", + "created": "dct:created", + "comment": "rdfs:comment", + "version": "dct:hasVersion", + "id": "dct:identifier", +} + def __sanitize_result(result: dict) -> dict: """Make sure `categories` and `pubmed_ids` are always arrays""" @@ -244,3 +259,46 @@ $comment_triple} sparql_password=sparql_password, sparql_auth_uri=sparql_auth_uri, ) + + +def get_rif_entries_by_symbol( + symbol: str, sparql_uri: str, graph: str = "<http://genenetwork.org>" +) -> dict: + """Fetch NCBI RIF entries by a symbol. Symbol here is case in-sensitive.""" + query = Template(""" +$prefix + +CONSTRUCT { + ?comment gnt:symbol ?symbol ; + gnt:species ?species ; + dct:references ?pmid ; + rdfs:comment ?text ; + dct:hasVersion ?version ; + dct:created ?created ; + gnt:hasGeneId ?gene_id ; + skos:notation ?taxonId . +} FROM $graph WHERE { + ?comment rdfs:label ?text_ ; + gnt:symbol ?symbol ; + rdf:type gnc:NCBIWikiEntry ; + gnt:hasGeneId ?gene_id_ ; + dct:hasVersion ?version ; + dct:references ?pmid_ ; + dct:created ?createTime ; + gnt:belongsToSpecies ?speciesId . + ?speciesId gnt:shortName ?species . + FILTER ( LCASE(STR(?symbol)) = LCASE("$symbol") ) . + OPTIONAL { ?comment skos:notation ?taxonId_ . } . + BIND (STR(?text_) AS ?text) . + BIND (xsd:integer(STRAFTER(STR(?taxonId_), STR(taxon:))) AS ?taxonId) . + BIND (xsd:integer(STRAFTER(STR(?pmid_), STR(pubmed:))) AS ?pmid) . + BIND (xsd:integer(STRAFTER(STR(?gene_id_), STR(generif:))) AS ?gene_id) . + BIND (STR(?createTime) AS ?created) . +} +""").substitute(prefix=RDF_PREFIXES, graph=graph, symbol=symbol) + results = query_frame_and_compact(query, RIF_CONTEXT, sparql_uri) + results["data"] = sorted( + results["data"], + key=lambda k: (k["species"], + datetime.strptime(k["created"], "%Y-%m-%d %H:%M:%S"))) + return results diff --git a/tests/unit/db/rdf/data.py b/tests/unit/db/rdf/data.py new file mode 100644 index 0000000..b4d0590 --- /dev/null +++ b/tests/unit/db/rdf/data.py @@ -0,0 +1,221 @@ +"""Some test data to be used in RDF data.""" + +LPL_RIF_ENTRIES = { + "@context": { + "data": "@graph", + "type": "@type", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", + "dct": "http://purl.org/dc/terms/", + "skos": "http://www.w3.org/2004/02/skos/core#", + "symbol": "gnt:symbol", + "species": "gnt:species", + "taxonomic_id": "skos:notation", + "gene_id": "gnt:hasGeneId", + "pubmed_id": "dct:references", + "created": "dct:created", + "comment": "rdfs:comment", + "version": "dct:hasVersion", + "id": "dct:identifier", + }, + "data": [ + { + "@id": "gn:rif-4023-36763533-2023-02-23T20:40:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-02-23 20:40:00", + "pubmed_id": 36763533, + "comment": "Angiopoietin-like protein 4/8 complex-mediated plasmin generation \ +leads to cleavage of the complex and restoration of LPL activity.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-36652113-2023-04-07T20:39:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-04-07 20:39:00", + "pubmed_id": 36652113, + "comment": "The breast cancer microenvironment and lipoprotein lipase: \ +Another negative notch for a beneficial enzyme?", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-36519761-2023-04-27T20:33:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-04-27 20:33:00", + "pubmed_id": 36519761, + "comment": "Parkin regulates neuronal lipid homeostasis through \ +SREBP2-lipoprotein lipase pathway-implications for Parkinson's disease.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-36708756-2023-05-22T20:32:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-05-22 20:32:00", + "pubmed_id": 36708756, + "comment": "Plasma Lipoprotein Lipase Is Associated with Risk of \ +Future Major Adverse Cardiovascular Events in Patients Following Carotid Endarterectomy.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37155355-2023-07-04T21:12:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-07-04 21:12:00", + "pubmed_id": 37155355, + "comment": "Inverse association between apolipoprotein C-II and \ +cardiovascular mortality: role of lipoprotein lipase activity modulation.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37432202-2023-07-13T20:35:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-07-13 20:35:00", + "pubmed_id": 37432202, + "comment": "Effect of the Interaction between Seaweed Intake and LPL \ +Polymorphisms on Metabolic Syndrome in Middle-Aged Korean Adults.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37568214-2023-08-14T20:37:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-08-14 20:37:00", + "pubmed_id": 37568214, + "comment": "Frameshift coding sequence variants in the LPL gene: identification \ +of two novel events and exploration of the genotype-phenotype relationship for \ +variants reported to date.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37550668-2023-08-22T20:29:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-08-22 20:29:00", + "pubmed_id": 37550668, + "comment": "The East Asian-specific LPL p.Ala288Thr (c.862G > A) missense \ +variant exerts a mild effect on protein function.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37128695-2023-09-12T20:35:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-09-12 20:35:00", + "pubmed_id": 37128695, + "comment": "Interaction between APOE, APOA1, and LPL Gene Polymorphisms \ +and Variability in Changes in Lipid and Blood Pressure following Orange Juice Intake: \ +A Pilot Study.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37427758-2023-09-25T09:33:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-09-25 09:33:00", + "pubmed_id": 37427758, + "comment": "Variants within the LPL gene confer susceptility to \ +diabetic kidney disease and rapid decline in kidney function in Chinese patients \ +with type 2 diabetes.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37901192-2023-11-01T08:55:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-11-01 08:55:00", + "pubmed_id": 37901192, + "comment": "The Association of Adipokines and Myokines in the \ +Blood of Obese Children and Adolescents with Lipoprotein Lipase rs328 Gene Variants.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37871217-2023-11-10T08:44:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-11-10 08:44:00", + "pubmed_id": 37871217, + "comment": "The lipoprotein lipase that is shuttled into \ +capillaries by GPIHBP1 enters the glycocalyx where it mediates lipoprotein processing.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-37858495-2023-12-28T20:33:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-12-28 20:33:00", + "pubmed_id": 37858495, + "comment": "Clinical profile, genetic spectrum and therapy \ +evaluation of 19 Chinese pediatric patients with lipoprotein lipase deficiency.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-4023-38114521-2023-12-29T20:33:00-5", + "gene_id": 4023, + "version": 5, + "species": "human", + "symbol": "LPL", + "created": "2023-12-29 20:33:00", + "pubmed_id": 38114521, + "comment": "Developing a model to predict the early risk of \ +hypertriglyceridemia based on inhibiting lipoprotein lipase (LPL): a translational study.", + "taxonomic_id": 9606, + }, + { + "@id": "gn:rif-16956-36519761-2023-04-27T20:33:00-5", + "gene_id": 16956, + "version": 5, + "species": "mouse", + "symbol": "Lpl", + "created": "2023-04-27 20:33:00", + "pubmed_id": 36519761, + "comment": "Parkin regulates neuronal lipid homeostasis through \ +SREBP2-lipoprotein lipase pathway-implications for Parkinson's disease.", + "taxonomic_id": 10090, + }, + { + "@id": "gn:rif-24539-38114521-2023-12-29T20:33:00-5", + "gene_id": 24539, + "version": 5, + "species": "rat", + "symbol": "Lpl", + "created": "2023-12-29 20:33:00", + "pubmed_id": 38114521, + "comment": "Developing a model to predict the early risk of \ +hypertriglyceridemia based on inhibiting lipoprotein lipase (LPL): a translational study.", + "taxonomic_id": 10116, + }, + ], +} diff --git a/tests/unit/db/rdf/test_wiki.py b/tests/unit/db/rdf/test_wiki.py index 3abf3ad..7a0dc3a 100644 --- a/tests/unit/db/rdf/test_wiki.py +++ b/tests/unit/db/rdf/test_wiki.py @@ -22,11 +22,14 @@ from tests.fixtures.rdf import ( SPARQL_CONF, ) +from tests.unit.db.rdf.data import LPL_RIF_ENTRIES + from gn3.db.rdf.wiki import ( __sanitize_result, get_wiki_entries_by_symbol, get_comment_history, update_wiki_comment, + get_rif_entries_by_symbol, ) GRAPH = "<http://cd-test.genenetwork.org>" @@ -396,3 +399,14 @@ def test_update_wiki_comment(rdf_setup): # pylint: disable=W0613,W0621 "version": 3, "web_url": "http://some-website.com", }) + + +@pytest.mark.rdf +def test_get_rif_entries_by_symbol(rdf_setup): # pylint: disable=W0613,W0621 + """Test fetching NCBI Rif Metadata from RDF""" + sparql_conf = SPARQL_CONF + assert get_rif_entries_by_symbol( + symbol="Lpl", + sparql_uri=sparql_conf["sparql_endpoint"], + graph=GRAPH, + ) == LPL_RIF_ENTRIES |
