feat: add sparql query to get wikidata

author: John Nduli 2024-06-14 20:56:05 +0300
committer: BonfaceKilz 2024-07-03 14:24:01 +0300
commit: 189d90ef44317784c1d884943113592e10b27493 (patch)
tree: 45a537e370b4b329c4c5f1753300960e4ebb34eb /scripts
parent: 9f27bdc3ca41e09a090093bc36f48851fcc4fc42 (diff)
download: genenetwork3-189d90ef44317784c1d884943113592e10b27493.tar.gz
1 files changed, 37 insertions, 0 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index d1eaf6f..63d730a 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -201,6 +201,36 @@ _:node rdf:type gnc:GNWikiEntry ;
     return cache
 
 
+def build_wiki_cache(sparql_uri: str):
+    cache = {}
+    sparql = SPARQLWrapper(sparql_uri)
+    sparql.setReturnFormat(JSON)
+    query = """
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX gnt: <http://genenetwork.org/term/>
+PREFIX gnc: <http://genenetwork.org/category/>
+
+SELECT * WHERE {
+    ?symbol rdfs:comment _:node ;
+            rdfs:label ?symbolName .
+_:node rdf:type gnc:NCBIWikiEntry ;
+       gnt:belongsToSpecies ?species ;
+       rdfs:comment ?comment .
+?species gnt:shortName ?speciesName .
+}
+"""
+    sparql.setQuery(query)
+    results = sparql.queryAndConvert()
+    if not isinstance(results, dict):
+        raise TypeError(f"Expected results to be a dict but found {type(results)}")
+    bindings = results["results"]["bindings"]
+    for entry in bindings :
+        x = (entry["speciesName"]["value"], entry["symbolName"]["value"],)
+        cache[x] = entry["comment"]["value"]
+    return cache
+
+
 def hash_generif_graph(sparql_uri: str):
     sparql = SPARQLWrapper(sparql_uri)
     sparql.setReturnFormat(JSON)
@@ -260,6 +290,13 @@ def index_rif_comments(species: str, symbol: str, rdfcache: dict):
         termgenerator.index_text(entry, 0, "XRF")
 
 
+@curry(2)
+def index_wiki_comments(species, symbol):
+    key = (species, symbol,)
+    entry = wikicache.get(key)
+    if entry:
+        termgenerator.index_text(entry, 0, "XRF")
+
 index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text)
 index_authors = lambda authors: termgenerator.index_text(authors, 0, "A")
 index_species = lambda species: termgenerator.index_text_without_positions(species, 0, "XS")
author	John Nduli	2024-06-14 20:56:05 +0300
committer	BonfaceKilz	2024-07-03 14:24:01 +0300
commit	189d90ef44317784c1d884943113592e10b27493 (patch)
tree	45a537e370b4b329c4c5f1753300960e4ebb34eb /scripts
parent	9f27bdc3ca41e09a090093bc36f48851fcc4fc42 (diff)
download	genenetwork3-189d90ef44317784c1d884943113592e10b27493.tar.gz