aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Nduli2024-06-14 20:56:05 +0300
committerBonfaceKilz2024-07-03 14:24:01 +0300
commit189d90ef44317784c1d884943113592e10b27493 (patch)
tree45a537e370b4b329c4c5f1753300960e4ebb34eb
parent9f27bdc3ca41e09a090093bc36f48851fcc4fc42 (diff)
downloadgenenetwork3-189d90ef44317784c1d884943113592e10b27493.tar.gz
feat: add sparql query to get wikidata
-rwxr-xr-xscripts/index-genenetwork37
1 files changed, 37 insertions, 0 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index d1eaf6f..63d730a 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -201,6 +201,36 @@ _:node rdf:type gnc:GNWikiEntry ;
return cache
+def build_wiki_cache(sparql_uri: str):
+ cache = {}
+ sparql = SPARQLWrapper(sparql_uri)
+ sparql.setReturnFormat(JSON)
+ query = """
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX gnt: <http://genenetwork.org/term/>
+PREFIX gnc: <http://genenetwork.org/category/>
+
+SELECT * WHERE {
+ ?symbol rdfs:comment _:node ;
+ rdfs:label ?symbolName .
+_:node rdf:type gnc:NCBIWikiEntry ;
+ gnt:belongsToSpecies ?species ;
+ rdfs:comment ?comment .
+?species gnt:shortName ?speciesName .
+}
+"""
+ sparql.setQuery(query)
+ results = sparql.queryAndConvert()
+ if not isinstance(results, dict):
+ raise TypeError(f"Expected results to be a dict but found {type(results)}")
+ bindings = results["results"]["bindings"]
+ for entry in bindings :
+ x = (entry["speciesName"]["value"], entry["symbolName"]["value"],)
+ cache[x] = entry["comment"]["value"]
+ return cache
+
+
def hash_generif_graph(sparql_uri: str):
sparql = SPARQLWrapper(sparql_uri)
sparql.setReturnFormat(JSON)
@@ -260,6 +290,13 @@ def index_rif_comments(species: str, symbol: str, rdfcache: dict):
termgenerator.index_text(entry, 0, "XRF")
+@curry(2)
+def index_wiki_comments(species, symbol):
+ key = (species, symbol,)
+ entry = wikicache.get(key)
+ if entry:
+ termgenerator.index_text(entry, 0, "XRF")
+
index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text)
index_authors = lambda authors: termgenerator.index_text(authors, 0, "A")
index_species = lambda species: termgenerator.index_text_without_positions(species, 0, "XS")