diff options
author | Munyoki Kilyungi | 2024-06-11 13:11:36 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-06-12 19:21:50 +0300 |
commit | 156cab9938e65b585d461a438e8d6456f49b85e9 (patch) | |
tree | 77bbeacdb4bd198f088775b8739b798224bcb101 /scripts | |
parent | a62798d0f59e953dbeba69f370f8d8045bd4becf (diff) | |
download | genenetwork3-156cab9938e65b585d461a438e8d6456f49b85e9.tar.gz |
Rework how the indexes are built.
Right now, the checks are done in Guix's build expression. This moves
that work to the index-genenetwork script.
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/index-genenetwork | 43 |
1 files changed, 31 insertions, 12 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index f291f6b..029712b 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -448,8 +448,11 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) -> db.close() -def verify_checksums(xapian_directory: str, - sql_uri: str) -> bool: +@click.command(help="Verify checksums and return True when the data has been changed.") +@click.argument("xapian_directory") +@click.argument("sql_uri") +def is_data_modified(xapian_directory: str, + sql_uri: str) -> None: dir_ = pathlib.Path(xapian_directory) with locked_xapian_writable_database(dir_) as db, database_connection(sql_uri) as conn: checksums = " ".join([ @@ -458,28 +461,32 @@ def verify_checksums(xapian_directory: str, conn, f"CHECKSUM TABLE {', '.join(db.get_metadata('tables').decode().split())}") ]) - return (db.get_metadata("generif-checksum").decode() == hash_generif_graph() and - db.get_metadata("checksums").decode() == checksums) + click.echo(db.get_metadata("generif-checksum").decode() == hash_generif_graph() and + db.get_metadata("checksums").decode() == checksums) @click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.") @click.argument("xapian_directory") @click.argument("sql_uri") # pylint: disable=missing-function-docstring -def main(xapian_directory: str, sql_uri: str) -> None: +def create_xapian_index(xapian_directory: str, sql_uri: str) -> None: logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"), format='%(relativeCreated)s: %(levelname)s: %(message)s') + logging.info("Verifying the checksums") + + build_directory = pathlib.Path(xapian_directory) / "build" + # Ensure no other build process is running. - if pathlib.Path(xapian_directory).exists(): + if build_directory.exists(): logging.error("Build directory %s already exists; " "perhaps another build process is running.", - xapian_directory) + build_directory) sys.exit(1) - pathlib.Path(xapian_directory).mkdir() - with temporary_directory("combined", xapian_directory) as combined_index: - with temporary_directory("build", xapian_directory) as xapian_build_directory: + build_directory.mkdir() + with temporary_directory("combined", build_directory) as combined_index: + with temporary_directory("build", build_directory) as xapian_build_directory: logging.info("Indexing genes") index_query(index_genes, genes_query, xapian_build_directory, sql_uri) logging.info("Indexing phenotypes") @@ -499,11 +506,23 @@ def main(xapian_directory: str, sql_uri: str) -> None: ] db.set_metadata("tables", " ".join(tables)) db.set_metadata("checksums", " ".join(checksums)) + logging.info("Writing generif checksums into index") + db.set_metadata("generif-checksum", hash_generif_graph().encode()) for child in combined_index.iterdir(): - shutil.move(child, xapian_directory) + shutil.move(child, pathlib.Path(xapian_directory) / child.name) + build_directory.rmdir() logging.info("Index built") +@click.group() +def cli(): + pass + + +cli.add_command(is_data_modified) +cli.add_command(create_xapian_index) + + if __name__ == "__main__": # pylint: disable=no-value-for-parameter - main() + cli() |