diff options
| author | Munyoki Kilyungi | 2024-10-08 13:22:33 +0300 |
|---|---|---|
| committer | BonfaceKilz | 2024-10-14 23:50:49 +0300 |
| commit | 123624dd7a4e8c4b53dfee09fb5c11b4dfb49119 (patch) | |
| tree | 1429802a7afd36c370242a22278de5f9375e556c /gn3/db/rdf/wiki.py | |
| parent | 7e72f187839e666b2313313a55757592f0ceb803 (diff) | |
| download | genenetwork3-123624dd7a4e8c4b53dfee09fb5c11b4dfb49119.tar.gz | |
Add function for fetching NCBI data.
* gn3/db/rdf/wiki.py: Import datetime. [RIF_CONTEXT]: New global variable defining the json-ld context for NCBI rif data. (get_rif_entries_by_symbol): New function. * tests/unit/db/rdf/data.py: New file containing expected NCBI test data. * tests/unit/db/rdf/test_wiki.py (test_update_wiki_comment): New test case. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/db/rdf/wiki.py')
| -rw-r--r-- | gn3/db/rdf/wiki.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py index b2b301a..5e8e02e 100644 --- a/gn3/db/rdf/wiki.py +++ b/gn3/db/rdf/wiki.py @@ -9,6 +9,7 @@ NOTE: In the CONSTRUCT queries below, we manually sort the arrays from <https://stackoverflow.com/questions/78186393> <https://www.w3.org/TR/rdf-sparql-query/#modOrderBy> """ +from datetime import datetime from string import Template from gn3.db.rdf import ( BASE_CONTEXT, @@ -35,6 +36,20 @@ WIKI_CONTEXT = BASE_CONTEXT | { "id": "dct:identifier", } +RIF_CONTEXT = BASE_CONTEXT | { + "dct": "http://purl.org/dc/terms/", + "skos": "http://www.w3.org/2004/02/skos/core#", + "symbol": "gnt:symbol", + "species": "gnt:species", + "taxonomic_id": "skos:notation", + "gene_id": "gnt:hasGeneId", + "pubmed_id": "dct:references", + "created": "dct:created", + "comment": "rdfs:comment", + "version": "dct:hasVersion", + "id": "dct:identifier", +} + def __sanitize_result(result: dict) -> dict: """Make sure `categories` and `pubmed_ids` are always arrays""" @@ -244,3 +259,46 @@ $comment_triple} sparql_password=sparql_password, sparql_auth_uri=sparql_auth_uri, ) + + +def get_rif_entries_by_symbol( + symbol: str, sparql_uri: str, graph: str = "<http://genenetwork.org>" +) -> dict: + """Fetch NCBI RIF entries by a symbol. Symbol here is case in-sensitive.""" + query = Template(""" +$prefix + +CONSTRUCT { + ?comment gnt:symbol ?symbol ; + gnt:species ?species ; + dct:references ?pmid ; + rdfs:comment ?text ; + dct:hasVersion ?version ; + dct:created ?created ; + gnt:hasGeneId ?gene_id ; + skos:notation ?taxonId . +} FROM $graph WHERE { + ?comment rdfs:label ?text_ ; + gnt:symbol ?symbol ; + rdf:type gnc:NCBIWikiEntry ; + gnt:hasGeneId ?gene_id_ ; + dct:hasVersion ?version ; + dct:references ?pmid_ ; + dct:created ?createTime ; + gnt:belongsToSpecies ?speciesId . + ?speciesId gnt:shortName ?species . + FILTER ( LCASE(STR(?symbol)) = LCASE("$symbol") ) . + OPTIONAL { ?comment skos:notation ?taxonId_ . } . + BIND (STR(?text_) AS ?text) . + BIND (xsd:integer(STRAFTER(STR(?taxonId_), STR(taxon:))) AS ?taxonId) . + BIND (xsd:integer(STRAFTER(STR(?pmid_), STR(pubmed:))) AS ?pmid) . + BIND (xsd:integer(STRAFTER(STR(?gene_id_), STR(generif:))) AS ?gene_id) . + BIND (STR(?createTime) AS ?created) . +} +""").substitute(prefix=RDF_PREFIXES, graph=graph, symbol=symbol) + results = query_frame_and_compact(query, RIF_CONTEXT, sparql_uri) + results["data"] = sorted( + results["data"], + key=lambda k: (k["species"], + datetime.strptime(k["created"], "%Y-%m-%d %H:%M:%S"))) + return results |
