aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-02 12:25:58 -0500
committerFrederick Muriuki Muriithi2025-06-02 12:32:09 -0500
commitf0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02 (patch)
tree3e1d10c3e053ba34248cf07702875de85b8d8d6d
parente3e097af4b92173e2bcb4771ccd1360fbe0e8cf6 (diff)
downloadgn-uploader-f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02.tar.gz
Add some debugging statements.
-rw-r--r--scripts/load_phenotypes_to_db.py6
-rw-r--r--uploader/phenotypes/models.py5
2 files changed, 9 insertions, 2 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index adb076f..dbfafc7 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -36,7 +36,6 @@ def save_phenotypes(
filesdir: Path
) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
- logger.info("Saving new phenotypes.")
## TODO: Replace with something like this: ##
# phenofiles = control_data["phenocovar"] + control_data.get(
# "gn-metadata", {}).get("pheno", [])
@@ -194,7 +193,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
_outdir = Path(bundle.parent, f"bundle_{bundle.stem}")
with ZipFile(str(bundle), "r") as zfile:
_files = rqtl2.extract(zfile, _outdir)
- logger.info("Saving basic phenotype data.")
+ logger.info("Saving new phenotypes.")
_phenos = save_phenotypes(conn, _control_data, _outdir)
def __build_phenos_maps__(accumulator, current):
dataid, row = current
@@ -220,6 +219,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
for row in samples_by_species_and_population(
conn, _species["SpeciesId"], _population["Id"])}
# b. Save all the data items (DataIds are vibes), return new IDs
+ logger.info("Saving new phenotypes data.")
data = save_pheno_data(conn=conn,
dataidmap=dataidmap,
pheno_name2id=pheno_name2id,
@@ -227,6 +227,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
control_data=_control_data,
filesdir=_outdir)
# 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
+ logger.info("Cross-referencing new phenotypes to their data and publications.")
xrefs = cross_reference_phenotypes_publications_and_data(
conn, tuple(dataidmap.values()))
# 5. If standard errors and N exist, save them too
@@ -247,6 +248,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
filesdir=_outdir)
# 6. If entirely new data, update authorisations (break this down)
update_auth(_user, _species, _population, _dataset, _phenos)
+ logger.info("Updating authorisation.")
return 0
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index eb926d8..f4d3529 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -336,12 +336,14 @@ def save_phenotypes_data(
}[table]
saved_data = tuple()
with conn.cursor(cursorclass=DictCursor) as cursor:
+ _count = 0
while True:
batch = take(data, 5000):
if len(batch) == 0:
logger.warning("Got an empty batch. This needs investigation.")
break
+ logger.debug("Saving batch of %s items.", len(batch))
cursor.executemany(
(f"INSERT INTO {_table_details['table']}"
f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) "
@@ -359,5 +361,8 @@ def save_phenotypes_data(
for item in batch)
for single in items))
saved_data = saved_data + tuple(dict(row) for row in cursor.fetchall())
+ _count = _count + len(batch)
+
+ logger.debug("Saved a total of %s data rows", _count)
return saved_data