aboutsummaryrefslogtreecommitdiff
path: root/scripts/index-genenetwork
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-04-04 10:07:03 +0300
committerFrederick Muriuki Muriithi2023-04-05 14:51:04 +0300
commit3d873435f0d464864d4d691d6be4db40931fac05 (patch)
tree7d311b19234d46d3a0e75ee6404b64cdfc0ee84b /scripts/index-genenetwork
parent87b439265ad44b90742e69a992e2fd66fdc48c49 (diff)
downloadgenenetwork3-3d873435f0d464864d4d691d6be4db40931fac05.tar.gz
Enable use of `database_connection` in scripts without current_app
There is need to run external scripts using the same configurations as the application but without the need to couple the script to the application. In this case, we provide the needed configuration directly in the CLI, and modify the existing `gn3.db_utils.database_connection` function to allow it to work coupled to the app or otherwise.
Diffstat (limited to 'scripts/index-genenetwork')
-rwxr-xr-xscripts/index-genenetwork13
1 files changed, 7 insertions, 6 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index b92fa5e..401ae1a 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -299,12 +299,12 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator:
process.join()
-def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, start: int = 0) -> None:
+def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None:
"""Run SQL query, and index its results for Xapian."""
i = start
try:
with worker_queue() as spawn_worker:
- with database_connection() as conn:
+ with database_connection(sql_uri) as conn:
for chunk in group(query_sql(conn, serialize_sql(
# KLUDGE: MariaDB does not allow an offset
# without a limit. So, set limit to a "high"
@@ -325,7 +325,7 @@ def index_query(index_function: Callable, query: SQLQuery, xapian_build_director
except MySQLdb._exceptions.OperationalError:
logging.warning("Reopening connection to recovering from SQL operational error",
exc_info=True)
- index_query(index_function, query, xapian_build_directory, i)
+ index_query(index_function, query, xapian_build_directory, sql_uri, i)
@contextlib.contextmanager
@@ -352,17 +352,18 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) ->
@click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.")
@click.argument("xapian_directory")
+@click.argument("sql_uri")
# pylint: disable=missing-function-docstring
-def main(xapian_directory: str) -> None:
+def main(xapian_directory: str, sql_uri: str) -> None:
logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"),
format='%(relativeCreated)s: %(levelname)s: %(message)s')
pathlib.Path(xapian_directory).mkdir(exist_ok=True)
with temporary_directory("combined", xapian_directory) as combined_index:
with temporary_directory("build", xapian_directory) as xapian_build_directory:
logging.info("Indexing genes")
- index_query(index_genes, genes_query, xapian_build_directory)
+ index_query(index_genes, genes_query, xapian_build_directory, sql_uri)
logging.info("Indexing phenotypes")
- index_query(index_phenotypes, phenotypes_query, xapian_build_directory)
+ index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri)
logging.info("Combining and compacting indices")
xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
for child in combined_index.iterdir():