diff options
-rw-r--r-- | gn3/db_utils.py | 18 | ||||
-rwxr-xr-x | scripts/index-genenetwork | 13 |
2 files changed, 19 insertions, 12 deletions
diff --git a/gn3/db_utils.py b/gn3/db_utils.py index 862d76c..4827358 100644 --- a/gn3/db_utils.py +++ b/gn3/db_utils.py @@ -7,20 +7,21 @@ import xapian from flask import current_app -def parse_db_url() -> Tuple: +def parse_db_url(sql_uri: str) -> Tuple: """function to parse SQL_URI env variable note:there\ is a default value for SQL_URI so a tuple result is\ always expected""" - parsed_db = urlparse(current_app.config["SQL_URI"]) + parsed_db = urlparse(sql_uri) return ( parsed_db.hostname, parsed_db.username, parsed_db.password, parsed_db.path[1:], parsed_db.port) # This function is deprecated. Use database_connection instead. -def database_connector() -> mdb.Connection: +def database_connector(sql_uri: str = "") -> mdb.Connection: """function to create db connector""" - host, user, passwd, db_name, db_port = parse_db_url() + host, user, passwd, db_name, db_port = parse_db_url( + sql_uri or current_app.config["SQL_URI"]) return mdb.connect(host, user, passwd, db_name, port=(db_port or 3306)) @@ -33,10 +34,15 @@ class Connection(Protocol): ... +## We need to decouple current_app from this module and function, but since this +## function is used throughout the code, that will require careful work to update +## all the code to pass the `sql_uri` argument, and make it a compulsory argument +## rather than its current optional state. @contextlib.contextmanager -def database_connection() -> Iterator[Connection]: +def database_connection(sql_uri: str = "") -> Iterator[Connection]: """Connect to MySQL database.""" - host, user, passwd, db_name, port = parse_db_url() + host, user, passwd, db_name, port = parse_db_url( + sql_uri or current_app.config["SQL_URI"]) connection = mdb.connect(db=db_name, user=user, passwd=passwd or '', diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index b92fa5e..401ae1a 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -299,12 +299,12 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator: process.join() -def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, start: int = 0) -> None: +def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None: """Run SQL query, and index its results for Xapian.""" i = start try: with worker_queue() as spawn_worker: - with database_connection() as conn: + with database_connection(sql_uri) as conn: for chunk in group(query_sql(conn, serialize_sql( # KLUDGE: MariaDB does not allow an offset # without a limit. So, set limit to a "high" @@ -325,7 +325,7 @@ def index_query(index_function: Callable, query: SQLQuery, xapian_build_director except MySQLdb._exceptions.OperationalError: logging.warning("Reopening connection to recovering from SQL operational error", exc_info=True) - index_query(index_function, query, xapian_build_directory, i) + index_query(index_function, query, xapian_build_directory, sql_uri, i) @contextlib.contextmanager @@ -352,17 +352,18 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) -> @click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.") @click.argument("xapian_directory") +@click.argument("sql_uri") # pylint: disable=missing-function-docstring -def main(xapian_directory: str) -> None: +def main(xapian_directory: str, sql_uri: str) -> None: logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"), format='%(relativeCreated)s: %(levelname)s: %(message)s') pathlib.Path(xapian_directory).mkdir(exist_ok=True) with temporary_directory("combined", xapian_directory) as combined_index: with temporary_directory("build", xapian_directory) as xapian_build_directory: logging.info("Indexing genes") - index_query(index_genes, genes_query, xapian_build_directory) + index_query(index_genes, genes_query, xapian_build_directory, sql_uri) logging.info("Indexing phenotypes") - index_query(index_phenotypes, phenotypes_query, xapian_build_directory) + index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri) logging.info("Combining and compacting indices") xapian_compact(combined_index, list(xapian_build_directory.iterdir())) for child in combined_index.iterdir(): |