about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-08-06 12:11:16 -0500
committerFrederick Muriuki Muriithi2024-08-06 12:11:16 -0500
commitac319bc2b2ddb4a741169ff5913a785e432c7dd5 (patch)
treefdbef59b43341ccd2cb889a27d5ee44dc1a30f71 /scripts
parente852490b41afc6b765be3a609a84f887a7b2df6c (diff)
downloadgn-uploader-ac319bc2b2ddb4a741169ff5913a785e432c7dd5.tar.gz
Pass logger on to inner functions
Pass the logger forward to inner functions to help with debugging things.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py94
1 files changed, 60 insertions, 34 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index dffd9d8..1eb3b75 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -19,10 +19,13 @@ from scripts.rqtl2.entry import build_main
 from scripts.rqtl2.cli_parser import add_common_arguments
 from scripts.cli_parser import init_cli_parser, add_global_data_arguments
 
-def insert_markers(dbconn: mdb.Connection,
-                   speciesid: int,
-                   markers: tuple[str, ...],
-                   pmapdata: Optional[Iterator[dict]]) -> int:
+def insert_markers(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        markers: tuple[str, ...],
+        pmapdata: Optional[Iterator[dict]],
+        _logger: Logger
+) -> int:
     """Insert genotype and genotype values into the database."""
     mdata = reduce(#type: ignore[var-annotated]
         lambda acc, row: ({#type: ignore[arg-type, return-value]
@@ -48,9 +51,12 @@ def insert_markers(dbconn: mdb.Connection,
                 } for marker in markers}.values()))
         return cursor.rowcount
 
-def insert_individuals(dbconn: mdb.Connection,
-                       speciesid: int,
-                       individuals: tuple[str, ...]) -> int:
+def insert_individuals(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> int:
     """Insert individuals/samples into the database."""
     with dbconn.cursor() as cursor:
         cursor.executemany(
@@ -61,10 +67,13 @@ def insert_individuals(dbconn: mdb.Connection,
                   for individual in individuals))
         return cursor.rowcount
 
-def cross_reference_individuals(dbconn: mdb.Connection,
-                                speciesid: int,
-                                populationid: int,
-                                individuals: tuple[str, ...]) -> int:
+def cross_reference_individuals(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        populationid: int,
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> int:
     """Cross reference any inserted individuals."""
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         paramstr = ", ".join(["%s"] * len(individuals))
@@ -80,11 +89,13 @@ def cross_reference_individuals(dbconn: mdb.Connection,
             tuple(ids))
         return cursor.rowcount
 
-def insert_genotype_data(dbconn: mdb.Connection,
-                         speciesid: int,
-                         genotypes: tuple[dict, ...],
-                         individuals: tuple[str, ...]) -> tuple[
-                             int, tuple[dict, ...]]:
+def insert_genotype_data(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        genotypes: tuple[dict, ...],
+        individuals: tuple[str, ...],
+        _logger: Logger
+) -> tuple[int, tuple[dict, ...]]:
     """Insert the genotype data values into the database."""
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         paramstr = ", ".join(["%s"] * len(individuals))
@@ -120,11 +131,14 @@ def insert_genotype_data(dbconn: mdb.Connection,
             "markerid": row["markerid"]
         } for row in data)
 
-def cross_reference_genotypes(dbconn: mdb.Connection,
-                              speciesid: int,
-                              datasetid: int,
-                              dataids: tuple[dict, ...],
-                              gmapdata: Optional[Iterator[dict]]) -> int:
+def cross_reference_genotypes(
+        dbconn: mdb.Connection,
+        speciesid: int,
+        datasetid: int,
+        dataids: tuple[dict, ...],
+        gmapdata: Optional[Iterator[dict]],
+        _logger: Logger
+) -> int:
     """Cross-reference the data to the relevant dataset."""
     _rows, markers, mdata = reduce(#type: ignore[var-annotated]
         lambda acc, row: (#type: ignore[return-value,arg-type]
@@ -141,20 +155,30 @@ def cross_reference_genotypes(dbconn: mdb.Connection,
 
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         paramstr = ", ".join(["%s"] * len(markers))
-        cursor.execute("SELECT Id, Name FROM Geno "
-                       f"WHERE SpeciesId=%s AND Name IN ({paramstr})",
-                       (speciesid,) + markers)
+        insertparams = (speciesid,) + markers
+        selectquery = ("SELECT Id, Name FROM Geno "
+                 f"WHERE SpeciesId=%s AND Name IN ({paramstr})")
+        _logger.debug(
+            "The select query was\n\t%s\n\nwith the parameters\n\t%s",
+            selectquery,
+            (speciesid,) + markers)
+        cursor.execute(query, insertparams)
         markersdict = {row["Id"]: row["Name"] for row in cursor.fetchall()}
-        cursor.executemany(
+        insertquery = (
             "INSERT INTO GenoXRef(GenoFreezeId, GenoId, DataId, cM) "
             "VALUES(%(datasetid)s, %(markerid)s, %(dataid)s, %(pos)s) "
-            "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId",
-            tuple({
-                **row,
-                "datasetid": datasetid,
-                "pos": mdata.get(markersdict.get(
-                    row.get("markerid"), "nosuchkey"), {}).get("pos")
-            } for row in dataids))
+            "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId")
+        insertparams = tuple({
+            **row,
+            "datasetid": datasetid,
+            "pos": mdata.get(markersdict.get(
+                row.get("markerid"), "nosuchkey"), {}).get("pos")
+        } for row in dataids)
+        _logger.debug(
+            "The insert query was\n\t%s\n\nwith the parameters\n\t%s",
+            insertquery,
+            (speciesid,) + markers)
+        cursor.executemany(insertquery, insertparams)
         return cursor.rowcount
 
 def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
@@ -189,7 +213,8 @@ def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
                     speciesid,
                     tuple(key for key in batch[0].keys() if key != "id"),
                     (rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata
-                     else None))
+                     else None),
+                    logger)
                 individuals = tuple(row["id"] for row in batch)
                 insert_individuals(dbconn, speciesid, individuals)
                 cross_reference_individuals(
@@ -202,7 +227,8 @@ def install_genotypes(#pylint: disable=[too-many-arguments, too-many-locals]
                     datasetid,
                     dataids,
                     (rqtl2.file_data(zfile, "gmap", cdata)
-                     if "gmap" in cdata else None))
+                     if "gmap" in cdata else None),
+                    logger)
                 count = count + len(batch)
         except rqtl2.InvalidFormat as exc:
             logger.error(str(exc))