aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/db_utils.py18
-rwxr-xr-xscripts/index-genenetwork13
2 files changed, 19 insertions, 12 deletions
diff --git a/gn3/db_utils.py b/gn3/db_utils.py
index 862d76c..4827358 100644
--- a/gn3/db_utils.py
+++ b/gn3/db_utils.py
@@ -7,20 +7,21 @@ import xapian
from flask import current_app
-def parse_db_url() -> Tuple:
+def parse_db_url(sql_uri: str) -> Tuple:
"""function to parse SQL_URI env variable note:there\
is a default value for SQL_URI so a tuple result is\
always expected"""
- parsed_db = urlparse(current_app.config["SQL_URI"])
+ parsed_db = urlparse(sql_uri)
return (
parsed_db.hostname, parsed_db.username, parsed_db.password,
parsed_db.path[1:], parsed_db.port)
# This function is deprecated. Use database_connection instead.
-def database_connector() -> mdb.Connection:
+def database_connector(sql_uri: str = "") -> mdb.Connection:
"""function to create db connector"""
- host, user, passwd, db_name, db_port = parse_db_url()
+ host, user, passwd, db_name, db_port = parse_db_url(
+ sql_uri or current_app.config["SQL_URI"])
return mdb.connect(host, user, passwd, db_name, port=(db_port or 3306))
@@ -33,10 +34,15 @@ class Connection(Protocol):
...
+## We need to decouple current_app from this module and function, but since this
+## function is used throughout the code, that will require careful work to update
+## all the code to pass the `sql_uri` argument, and make it a compulsory argument
+## rather than its current optional state.
@contextlib.contextmanager
-def database_connection() -> Iterator[Connection]:
+def database_connection(sql_uri: str = "") -> Iterator[Connection]:
"""Connect to MySQL database."""
- host, user, passwd, db_name, port = parse_db_url()
+ host, user, passwd, db_name, port = parse_db_url(
+ sql_uri or current_app.config["SQL_URI"])
connection = mdb.connect(db=db_name,
user=user,
passwd=passwd or '',
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index b92fa5e..401ae1a 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -299,12 +299,12 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator:
process.join()
-def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, start: int = 0) -> None:
+def index_query(index_function: Callable, query: SQLQuery, xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None:
"""Run SQL query, and index its results for Xapian."""
i = start
try:
with worker_queue() as spawn_worker:
- with database_connection() as conn:
+ with database_connection(sql_uri) as conn:
for chunk in group(query_sql(conn, serialize_sql(
# KLUDGE: MariaDB does not allow an offset
# without a limit. So, set limit to a "high"
@@ -325,7 +325,7 @@ def index_query(index_function: Callable, query: SQLQuery, xapian_build_director
except MySQLdb._exceptions.OperationalError:
logging.warning("Reopening connection to recovering from SQL operational error",
exc_info=True)
- index_query(index_function, query, xapian_build_directory, i)
+ index_query(index_function, query, xapian_build_directory, sql_uri, i)
@contextlib.contextmanager
@@ -352,17 +352,18 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) ->
@click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.")
@click.argument("xapian_directory")
+@click.argument("sql_uri")
# pylint: disable=missing-function-docstring
-def main(xapian_directory: str) -> None:
+def main(xapian_directory: str, sql_uri: str) -> None:
logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"),
format='%(relativeCreated)s: %(levelname)s: %(message)s')
pathlib.Path(xapian_directory).mkdir(exist_ok=True)
with temporary_directory("combined", xapian_directory) as combined_index:
with temporary_directory("build", xapian_directory) as xapian_build_directory:
logging.info("Indexing genes")
- index_query(index_genes, genes_query, xapian_build_directory)
+ index_query(index_genes, genes_query, xapian_build_directory, sql_uri)
logging.info("Indexing phenotypes")
- index_query(index_phenotypes, phenotypes_query, xapian_build_directory)
+ index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri)
logging.info("Combining and compacting indices")
xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
for child in combined_index.iterdir():