diff options
Diffstat (limited to 'gn3/db/rdf/wiki.py')
| -rw-r--r-- | gn3/db/rdf/wiki.py | 158 |
1 files changed, 135 insertions, 23 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py index b2b301a..dd8d204 100644 --- a/gn3/db/rdf/wiki.py +++ b/gn3/db/rdf/wiki.py @@ -15,6 +15,7 @@ from gn3.db.rdf import ( RDF_PREFIXES, query_frame_and_compact, update_rdf, + sparql_query, ) @@ -41,6 +42,10 @@ def __sanitize_result(result: dict) -> dict: if not result: return {} categories = result.get("categories") + if (version := result.get("version")) and isinstance(version, str): + result["version"] = int(version) + if (wiki_id := result.get("id")) and isinstance(version, str): + result["id"] = int(wiki_id) if isinstance(categories, str): result["categories"] = [categories] if categories else [] result["categories"] = sorted(result["categories"]) @@ -79,7 +84,7 @@ CONSTRUCT { gnt:belongsToCategory ?category ; gnt:hasVersion ?max ; dct:created ?created ; - dct:identifier ?id_ . + dct:identifier ?id . } FROM $graph WHERE { ?comment rdfs:label ?text_ ; gnt:symbol ?symbol ; @@ -88,12 +93,12 @@ CONSTRUCT { dct:created ?createTime . FILTER ( LCASE(STR(?symbol)) = LCASE("$symbol") ) . { - SELECT (MAX(?vers) AS ?max) ?id_ WHERE { + SELECT (MAX(?vers) AS ?max_) ?id_ WHERE { ?comment dct:identifier ?id_ ; dct:hasVersion ?vers . } } - ?comment dct:hasVersion ?max . + ?comment dct:hasVersion ?max_ . OPTIONAL { ?comment gnt:reason ?reason_ } . OPTIONAL { ?comment gnt:belongsToSpecies ?speciesId . @@ -106,6 +111,8 @@ CONSTRUCT { OPTIONAL { ?comment gnt:belongsToCategory ?category_ } . BIND (str(?createTime) AS ?created) . BIND (str(?text_) AS ?text) . + BIND (str(?max_) AS ?max) . + BIND (str(?id_) AS ?id) . BIND (STR(COALESCE(?pmid_, "")) AS ?pmid) . BIND (COALESCE(?reason_, "") AS ?reason) . BIND (STR(COALESCE(?weburl_, "")) AS ?weburl) . @@ -154,7 +161,7 @@ CONSTRUCT { rdfs:label ?text_ ; gnt:symbol ?symbol ; dct:created ?createTime ; - dct:hasVersion ?version ; + dct:hasVersion ?version_ ; dct:identifier $comment_id . OPTIONAL { ?comment gnt:reason ?reason_ } . OPTIONAL { @@ -167,6 +174,7 @@ CONSTRUCT { OPTIONAL { ?comment foaf:mbox ?email_ . } . OPTIONAL { ?comment gnt:belongsToCategory ?category_ . } . BIND (str(?text_) AS ?text) . + BIND (str(?version_) AS ?version) . BIND (str(?createTime) AS ?created) . BIND (STR(COALESCE(?pmid_, "")) AS ?pmid) . BIND (COALESCE(?reason_, "") AS ?reason) . @@ -186,38 +194,42 @@ CONSTRUCT { def update_wiki_comment( - insert_dict: dict, - sparql_user: str, - sparql_password: str, - sparql_auth_uri: str, - graph: str = "<http://genenetwork.org>", + insert_dict: dict, + sparql_user: str, + sparql_password: str, + sparql_auth_uri: str, + graph: str = "<http://genenetwork.org>", ) -> str: """Update a wiki comment by inserting a comment with the same -identifier but an updated version id. + identifier but an updated version id. """ name = f"gn:wiki-{insert_dict['Id']}-{insert_dict['versionId']}" - comment_triple = Template("""$name rdfs:label '''$comment'''@en ; + comment_triple = Template( + """$name rdfs:label '''$comment'''@en ; rdf:type gnc:GNWikiEntry ; gnt:symbol "$symbol" ; dct:identifier "$comment_id"^^xsd:integer ; dct:hasVersion "$next_version"^^xsd:integer ; dct:created "$created"^^xsd:datetime . -""").substitute( +""" + ).substitute( comment=insert_dict["comment"], - name=name, symbol=insert_dict['symbol'], - comment_id=insert_dict["Id"], next_version=insert_dict["versionId"], - created=insert_dict["createtime"]) + name=name, + symbol=insert_dict["symbol"], + comment_id=insert_dict["Id"], + next_version=insert_dict["versionId"], + created=insert_dict["createtime"], + ) using = "" if insert_dict["email"]: comment_triple += f"{name} foaf:mbox <{insert_dict['email']}> .\n" if insert_dict["initial"]: comment_triple += f"{name} gnt:initial \"{insert_dict['initial']}\" .\n" - if insert_dict["species"]: + if insert_dict["species"] and insert_dict["species"].lower() != "no specific species": comment_triple += f"{name} gnt:belongsToSpecies ?speciesId .\n" using = Template( - """ USING $graph WHERE { ?speciesId gnt:shortName "$species" . } """).substitute( - graph=graph, species=insert_dict["species"] - ) + """ USING $graph WHERE { ?speciesId gnt:shortName "$species" . } """ + ).substitute(graph=graph, species=insert_dict["species"]) if insert_dict["reason"]: comment_triple += f"{name} gnt:reason \"{insert_dict['reason']}\" .\n" if insert_dict["weburl"]: @@ -236,10 +248,110 @@ INSERT { GRAPH $graph { $comment_triple} } $using -""").substitute(prefix=RDF_PREFIXES, - graph=graph, - comment_triple=comment_triple, - using=using), +""" + ).substitute( + prefix=RDF_PREFIXES, graph=graph, comment_triple=comment_triple, using=using + ), + sparql_user=sparql_user, + sparql_password=sparql_password, + sparql_auth_uri=sparql_auth_uri, + ) + + +def get_rif_entries_by_symbol( + symbol: str, sparql_uri: str, graph: str = "<http://genenetwork.org>" +) -> dict: + """Fetch NCBI RIF entries for a given symbol (case-insensitive). + +This function retrieves NCBI RIF entries using a SPARQL `SELECT` query +instead of a `CONSTRUCT` to avoid truncation. The Virtuoso SPARQL +engine limits query results to 1,048,576 triples per solution, and +NCBI entries can exceed this limit. Since there may be more than +2,000 entries, which could result in the number of triples surpassing +the limit, `SELECT` is used to ensure complete data retrieval without +truncation. See: + +<https://community.openlinksw.com/t/sparql-query-limiting-results-to-100000-triples/2131> + + """ + # XXX: Consider pagination + query = Template( + """ +$prefix + +SELECT ?comment ?symbol ?species ?pubmed_id ?version ?created ?gene_id ?taxonomic_id +FROM $graph WHERE { + ?comment_id rdfs:label ?text_ ; + gnt:symbol ?symbol ; + rdf:type gnc:NCBIWikiEntry ; + gnt:hasGeneId ?gene_id_ ; + dct:hasVersion ?version ; + dct:references ?pmid_ ; + dct:created ?createTime ; + gnt:belongsToSpecies ?speciesId . + ?speciesId rdfs:label ?species . + FILTER ( LCASE(?symbol) = LCASE("$symbol") ) . + OPTIONAL { ?comment_id skos:notation ?taxonId_ . } . + BIND (STR(?text_) AS ?comment) . + BIND (xsd:integer(STRAFTER(STR(?taxonId_), STR(taxon:))) AS ?taxonomic_id) . + BIND (xsd:integer(STRAFTER(STR(?pmid_), STR(pubmed:))) AS ?pubmed_id) . + BIND (xsd:integer(STRAFTER(STR(?gene_id_), STR(generif:))) AS ?gene_id) . + BIND (STR(?createTime) AS ?created) . +} ORDER BY ?species ?createTime +""" + ).substitute(prefix=RDF_PREFIXES, graph=graph, symbol=symbol) + results: dict[str, dict | list] = { + "@context": { + "dct": "http://purl.org/dc/terms/", + "gnt": "http://genenetwork.org/term/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "symbol": "gnt:symbol", + "species": "gnt:species", + "taxonomic_id": "skos:notation", + "gene_id": "gnt:hasGeneId", + "pubmed_id": "dct:references", + "created": "dct:created", + "comment": "rdfs:comment", + "version": "dct:hasVersion", + } + } + data: list[dict[str, int | str]] = [] + for entry in sparql_query(query=query, endpoint=sparql_uri, format_type="json"): + data.append( + { + key: int(metadata.get("value")) + if metadata.get("value").isdigit() + else metadata.get("value") + for key, metadata in entry.items() + } + ) + results["data"] = data + return results + + +def delete_wiki_entries_by_id( + wiki_id: int, + sparql_user: str, + sparql_password: str, + sparql_auth_uri: str, + graph: str = "<http://genenetwork.org>", +) -> str: + """Delete all wiki entries associated with a given ID.""" + query = Template( + """ +$prefix + +DELETE WHERE { + GRAPH $graph { + ?comment dct:identifier \"$wiki_id\"^^xsd:integer . + ?comment ?p ?o . + } +} +""" + ).substitute(prefix=RDF_PREFIXES, graph=graph, wiki_id=wiki_id) + return update_rdf( + query=query, sparql_user=sparql_user, sparql_password=sparql_password, sparql_auth_uri=sparql_auth_uri, |
