Add some debugging statements.

author: Frederick Muriuki Muriithi 2025-06-02 12:25:58 -0500
committer: Frederick Muriuki Muriithi 2025-06-02 12:32:09 -0500
commit: f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02 (patch)
tree: 3e1d10c3e053ba34248cf07702875de85b8d8d6d
parent: e3e097af4b92173e2bcb4771ccd1360fbe0e8cf6 (diff)
download: gn-uploader-f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02.tar.gz
2 files changed, 9 insertions, 2 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index adb076f..dbfafc7 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -36,7 +36,6 @@ def save_phenotypes(
         filesdir: Path
 ) -> tuple[dict, ...]:
     """Read `phenofiles` and save the phenotypes therein."""
-    logger.info("Saving new phenotypes.")
     ## TODO: Replace with something like this: ##
     # phenofiles = control_data["phenocovar"] + control_data.get(
     #     "gn-metadata", {}).get("pheno", [])
@@ -194,7 +193,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
     _outdir = Path(bundle.parent, f"bundle_{bundle.stem}")
     with ZipFile(str(bundle), "r") as zfile:
         _files = rqtl2.extract(zfile, _outdir)
-    logger.info("Saving basic phenotype data.")
+    logger.info("Saving new phenotypes.")
     _phenos = save_phenotypes(conn, _control_data, _outdir)
     def __build_phenos_maps__(accumulator, current):
         dataid, row = current
@@ -220,6 +219,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
         for row in samples_by_species_and_population(
                 conn, _species["SpeciesId"], _population["Id"])}
     #    b. Save all the data items (DataIds are vibes), return new IDs
+    logger.info("Saving new phenotypes data.")
     data = save_pheno_data(conn=conn,
                            dataidmap=dataidmap,
                            pheno_name2id=pheno_name2id,
@@ -227,6 +227,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
                            control_data=_control_data,
                            filesdir=_outdir)
     # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
+    logger.info("Cross-referencing new phenotypes to their data and publications.")
     xrefs = cross_reference_phenotypes_publications_and_data(
         conn, tuple(dataidmap.values()))
     # 5. If standard errors and N exist, save them too
@@ -247,6 +248,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
                                 filesdir=_outdir)
     # 6. If entirely new data, update authorisations (break this down)
     update_auth(_user, _species, _population, _dataset, _phenos)
+    logger.info("Updating authorisation.")
     return 0
 
 
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index eb926d8..f4d3529 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -336,12 +336,14 @@ def save_phenotypes_data(
     }[table]
     saved_data = tuple()
     with conn.cursor(cursorclass=DictCursor) as cursor:
+        _count = 0
         while True:
             batch = take(data, 5000):
             if len(batch) == 0:
                 logger.warning("Got an empty batch. This needs investigation.")
                 break
 
+            logger.debug("Saving batch of %s items.", len(batch))
             cursor.executemany(
                 (f"INSERT INTO {_table_details['table']}"
                  f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) "
@@ -359,5 +361,8 @@ def save_phenotypes_data(
                                   for item in batch)
                                  for single in items))
             saved_data = saved_data + tuple(dict(row) for row in cursor.fetchall())
+            _count = _count + len(batch)
 
+
+    logger.debug("Saved a total of %s data rows", _count)
     return saved_data
author	Frederick Muriuki Muriithi	2025-06-02 12:25:58 -0500
committer	Frederick Muriuki Muriithi	2025-06-02 12:32:09 -0500
commit	f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02 (patch)
tree	3e1d10c3e053ba34248cf07702875de85b8d8d6d
parent	e3e097af4b92173e2bcb4771ccd1360fbe0e8cf6 (diff)
download	gn-uploader-f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02.tar.gz