diff options
Diffstat (limited to 'scripts/index-genenetwork')
-rwxr-xr-x | scripts/index-genenetwork | 37 |
1 files changed, 19 insertions, 18 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index 029712b..f79bfc1 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -168,12 +168,9 @@ def locked_xapian_writable_database(path: pathlib.Path) -> xapian.WritableDataba db.close() - -def build_rif_cache(): +def build_rif_cache(sparql_uri: str): cache = {} - sparql = SPARQLWrapper( - "http://localhost:8982/sparql" - ) + sparql = SPARQLWrapper(sparql_uri) sparql.setReturnFormat(JSON) query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> @@ -198,10 +195,8 @@ _:node rdf:type gnc:GNWikiEntry ; return cache -def hash_generif_graph(): - sparql = SPARQLWrapper( - "http://localhost:8982/sparql" - ) +def hash_generif_graph(sparql_uri: str): + sparql = SPARQLWrapper(sparql_uri) sparql.setReturnFormat(JSON) query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> @@ -395,13 +390,14 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator: def index_query(index_function: Callable, query: SQLQuery, - xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None: + xapian_build_directory: pathlib.Path, sql_uri: str, + sparql_uri: str, start: int = 0) -> None: """Run SQL query, and index its results for Xapian.""" i = start try: with worker_queue() as spawn_worker: global rdfcache - rdfcache = build_rif_cache() + rdfcache = build_rif_cache(sparql_uri) with database_connection(sql_uri) as conn: for chunk in group(query_sql(conn, serialize_sql( # KLUDGE: MariaDB does not allow an offset @@ -451,8 +447,10 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) -> @click.command(help="Verify checksums and return True when the data has been changed.") @click.argument("xapian_directory") @click.argument("sql_uri") +@click.argument("sparql_uri") def is_data_modified(xapian_directory: str, - sql_uri: str) -> None: + sql_uri: str, + sparql_uri: str) -> None: dir_ = pathlib.Path(xapian_directory) with locked_xapian_writable_database(dir_) as db, database_connection(sql_uri) as conn: checksums = " ".join([ @@ -461,15 +459,18 @@ def is_data_modified(xapian_directory: str, conn, f"CHECKSUM TABLE {', '.join(db.get_metadata('tables').decode().split())}") ]) - click.echo(db.get_metadata("generif-checksum").decode() == hash_generif_graph() and - db.get_metadata("checksums").decode() == checksums) + click.echo( + db.get_metadata("generif-checksum").decode() == hash_generif_graph(sparql_uri) and + db.get_metadata("checksums").decode() == checksums) @click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.") @click.argument("xapian_directory") @click.argument("sql_uri") +@click.argument("sparql_uri") # pylint: disable=missing-function-docstring -def create_xapian_index(xapian_directory: str, sql_uri: str) -> None: +def create_xapian_index(xapian_directory: str, sql_uri: str, + sparql_uri: str) -> None: logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"), format='%(relativeCreated)s: %(levelname)s: %(message)s') @@ -488,9 +489,9 @@ def create_xapian_index(xapian_directory: str, sql_uri: str) -> None: with temporary_directory("combined", build_directory) as combined_index: with temporary_directory("build", build_directory) as xapian_build_directory: logging.info("Indexing genes") - index_query(index_genes, genes_query, xapian_build_directory, sql_uri) + index_query(index_genes, genes_query, xapian_build_directory, sql_uri, sparql_uri) logging.info("Indexing phenotypes") - index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri) + index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri, sparql_uri) logging.info("Combining and compacting indices") xapian_compact(combined_index, list(xapian_build_directory.iterdir())) logging.info("Writing table checksums into index") @@ -507,7 +508,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str) -> None: db.set_metadata("tables", " ".join(tables)) db.set_metadata("checksums", " ".join(checksums)) logging.info("Writing generif checksums into index") - db.set_metadata("generif-checksum", hash_generif_graph().encode()) + db.set_metadata("generif-checksum", hash_generif_graph(sparql_uri).encode()) for child in combined_index.iterdir(): shutil.move(child, pathlib.Path(xapian_directory) / child.name) build_directory.rmdir() |