about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/index-genenetwork37
1 files changed, 19 insertions, 18 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 029712b..f79bfc1 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -168,12 +168,9 @@ def locked_xapian_writable_database(path: pathlib.Path) -> xapian.WritableDataba
         db.close()
 
 
-
-def build_rif_cache():
+def build_rif_cache(sparql_uri: str):
     cache = {}
-    sparql = SPARQLWrapper(
-        "http://localhost:8982/sparql"
-    )
+    sparql = SPARQLWrapper(sparql_uri)
     sparql.setReturnFormat(JSON)
     query = """
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -198,10 +195,8 @@ _:node rdf:type gnc:GNWikiEntry ;
     return cache
 
 
-def hash_generif_graph():
-    sparql = SPARQLWrapper(
-        "http://localhost:8982/sparql"
-    )
+def hash_generif_graph(sparql_uri: str):
+    sparql = SPARQLWrapper(sparql_uri)
     sparql.setReturnFormat(JSON)
     query = """
 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -395,13 +390,14 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator:
 
 
 def index_query(index_function: Callable, query: SQLQuery,
-                xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None:
+                xapian_build_directory: pathlib.Path, sql_uri: str,
+                sparql_uri: str, start: int = 0) -> None:
     """Run SQL query, and index its results for Xapian."""
     i = start
     try:
         with worker_queue() as spawn_worker:
             global rdfcache
-            rdfcache = build_rif_cache()
+            rdfcache = build_rif_cache(sparql_uri)
             with database_connection(sql_uri) as conn:
                 for chunk in group(query_sql(conn, serialize_sql(
                         # KLUDGE: MariaDB does not allow an offset
@@ -451,8 +447,10 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) ->
 @click.command(help="Verify checksums and return True when the data has been changed.")
 @click.argument("xapian_directory")
 @click.argument("sql_uri")
+@click.argument("sparql_uri")
 def is_data_modified(xapian_directory: str,
-                     sql_uri: str) -> None:
+                     sql_uri: str,
+                     sparql_uri: str) -> None:
     dir_ = pathlib.Path(xapian_directory)
     with locked_xapian_writable_database(dir_) as db, database_connection(sql_uri) as conn:
         checksums = " ".join([
@@ -461,15 +459,18 @@ def is_data_modified(xapian_directory: str,
                     conn,
                     f"CHECKSUM TABLE {', '.join(db.get_metadata('tables').decode().split())}")
         ])
-        click.echo(db.get_metadata("generif-checksum").decode() == hash_generif_graph() and
-                   db.get_metadata("checksums").decode() == checksums)
+        click.echo(
+            db.get_metadata("generif-checksum").decode() == hash_generif_graph(sparql_uri) and
+            db.get_metadata("checksums").decode() == checksums)
 
 
 @click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.")
 @click.argument("xapian_directory")
 @click.argument("sql_uri")
+@click.argument("sparql_uri")
 # pylint: disable=missing-function-docstring
-def create_xapian_index(xapian_directory: str, sql_uri: str) -> None:
+def create_xapian_index(xapian_directory: str, sql_uri: str,
+                        sparql_uri: str) -> None:
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"),
                         format='%(relativeCreated)s: %(levelname)s: %(message)s')
 
@@ -488,9 +489,9 @@ def create_xapian_index(xapian_directory: str, sql_uri: str) -> None:
     with temporary_directory("combined", build_directory) as combined_index:
         with temporary_directory("build", build_directory) as xapian_build_directory:
             logging.info("Indexing genes")
-            index_query(index_genes, genes_query, xapian_build_directory, sql_uri)
+            index_query(index_genes, genes_query, xapian_build_directory, sql_uri, sparql_uri)
             logging.info("Indexing phenotypes")
-            index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri)
+            index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri, sparql_uri)
             logging.info("Combining and compacting indices")
             xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
             logging.info("Writing table checksums into index")
@@ -507,7 +508,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str) -> None:
                 db.set_metadata("tables", " ".join(tables))
                 db.set_metadata("checksums", " ".join(checksums))
                 logging.info("Writing generif checksums into index")
-                db.set_metadata("generif-checksum", hash_generif_graph().encode())
+                db.set_metadata("generif-checksum", hash_generif_graph(sparql_uri).encode())
         for child in combined_index.iterdir():
             shutil.move(child, pathlib.Path(xapian_directory) / child.name)
     build_directory.rmdir()