diff options
author | John Nduli | 2024-06-14 20:56:05 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-07-03 14:24:01 +0300 |
commit | 189d90ef44317784c1d884943113592e10b27493 (patch) | |
tree | 45a537e370b4b329c4c5f1753300960e4ebb34eb /scripts/index-genenetwork | |
parent | 9f27bdc3ca41e09a090093bc36f48851fcc4fc42 (diff) | |
download | genenetwork3-189d90ef44317784c1d884943113592e10b27493.tar.gz |
feat: add sparql query to get wikidata
Diffstat (limited to 'scripts/index-genenetwork')
-rwxr-xr-x | scripts/index-genenetwork | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index d1eaf6f..63d730a 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -201,6 +201,36 @@ _:node rdf:type gnc:GNWikiEntry ; return cache +def build_wiki_cache(sparql_uri: str): + cache = {} + sparql = SPARQLWrapper(sparql_uri) + sparql.setReturnFormat(JSON) + query = """ +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +PREFIX gnt: <http://genenetwork.org/term/> +PREFIX gnc: <http://genenetwork.org/category/> + +SELECT * WHERE { + ?symbol rdfs:comment _:node ; + rdfs:label ?symbolName . +_:node rdf:type gnc:NCBIWikiEntry ; + gnt:belongsToSpecies ?species ; + rdfs:comment ?comment . +?species gnt:shortName ?speciesName . +} +""" + sparql.setQuery(query) + results = sparql.queryAndConvert() + if not isinstance(results, dict): + raise TypeError(f"Expected results to be a dict but found {type(results)}") + bindings = results["results"]["bindings"] + for entry in bindings : + x = (entry["speciesName"]["value"], entry["symbolName"]["value"],) + cache[x] = entry["comment"]["value"] + return cache + + def hash_generif_graph(sparql_uri: str): sparql = SPARQLWrapper(sparql_uri) sparql.setReturnFormat(JSON) @@ -260,6 +290,13 @@ def index_rif_comments(species: str, symbol: str, rdfcache: dict): termgenerator.index_text(entry, 0, "XRF") +@curry(2) +def index_wiki_comments(species, symbol): + key = (species, symbol,) + entry = wikicache.get(key) + if entry: + termgenerator.index_text(entry, 0, "XRF") + index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text) index_authors = lambda authors: termgenerator.index_text(authors, 0, "A") index_species = lambda species: termgenerator.index_text_without_positions(species, 0, "XS") |