From 189d90ef44317784c1d884943113592e10b27493 Mon Sep 17 00:00:00 2001 From: John Nduli Date: Fri, 14 Jun 2024 20:56:05 +0300 Subject: feat: add sparql query to get wikidata --- scripts/index-genenetwork | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'scripts') diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index d1eaf6f..63d730a 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -201,6 +201,36 @@ _:node rdf:type gnc:GNWikiEntry ; return cache +def build_wiki_cache(sparql_uri: str): + cache = {} + sparql = SPARQLWrapper(sparql_uri) + sparql.setReturnFormat(JSON) + query = """ +PREFIX rdf: +PREFIX rdfs: +PREFIX gnt: +PREFIX gnc: + +SELECT * WHERE { + ?symbol rdfs:comment _:node ; + rdfs:label ?symbolName . +_:node rdf:type gnc:NCBIWikiEntry ; + gnt:belongsToSpecies ?species ; + rdfs:comment ?comment . +?species gnt:shortName ?speciesName . +} +""" + sparql.setQuery(query) + results = sparql.queryAndConvert() + if not isinstance(results, dict): + raise TypeError(f"Expected results to be a dict but found {type(results)}") + bindings = results["results"]["bindings"] + for entry in bindings : + x = (entry["speciesName"]["value"], entry["symbolName"]["value"],) + cache[x] = entry["comment"]["value"] + return cache + + def hash_generif_graph(sparql_uri: str): sparql = SPARQLWrapper(sparql_uri) sparql.setReturnFormat(JSON) @@ -260,6 +290,13 @@ def index_rif_comments(species: str, symbol: str, rdfcache: dict): termgenerator.index_text(entry, 0, "XRF") +@curry(2) +def index_wiki_comments(species, symbol): + key = (species, symbol,) + entry = wikicache.get(key) + if entry: + termgenerator.index_text(entry, 0, "XRF") + index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text) index_authors = lambda authors: termgenerator.index_text(authors, 0, "A") index_species = lambda species: termgenerator.index_text_without_positions(species, 0, "XS") -- cgit v1.2.3