diff options
author | Frederick Muriuki Muriithi | 2025-06-02 12:25:58 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-02 12:32:09 -0500 |
commit | f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02 (patch) | |
tree | 3e1d10c3e053ba34248cf07702875de85b8d8d6d | |
parent | e3e097af4b92173e2bcb4771ccd1360fbe0e8cf6 (diff) | |
download | gn-uploader-f0b0e04bb6aa1744c802d4eb5cd1cb7c84c88b02.tar.gz |
Add some debugging statements.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 6 | ||||
-rw-r--r-- | uploader/phenotypes/models.py | 5 |
2 files changed, 9 insertions, 2 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index adb076f..dbfafc7 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -36,7 +36,6 @@ def save_phenotypes( filesdir: Path ) -> tuple[dict, ...]: """Read `phenofiles` and save the phenotypes therein.""" - logger.info("Saving new phenotypes.") ## TODO: Replace with something like this: ## # phenofiles = control_data["phenocovar"] + control_data.get( # "gn-metadata", {}).get("pheno", []) @@ -194,7 +193,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: _outdir = Path(bundle.parent, f"bundle_{bundle.stem}") with ZipFile(str(bundle), "r") as zfile: _files = rqtl2.extract(zfile, _outdir) - logger.info("Saving basic phenotype data.") + logger.info("Saving new phenotypes.") _phenos = save_phenotypes(conn, _control_data, _outdir) def __build_phenos_maps__(accumulator, current): dataid, row = current @@ -220,6 +219,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: for row in samples_by_species_and_population( conn, _species["SpeciesId"], _population["Id"])} # b. Save all the data items (DataIds are vibes), return new IDs + logger.info("Saving new phenotypes data.") data = save_pheno_data(conn=conn, dataidmap=dataidmap, pheno_name2id=pheno_name2id, @@ -227,6 +227,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: control_data=_control_data, filesdir=_outdir) # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef + logger.info("Cross-referencing new phenotypes to their data and publications.") xrefs = cross_reference_phenotypes_publications_and_data( conn, tuple(dataidmap.values())) # 5. If standard errors and N exist, save them too @@ -247,6 +248,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: filesdir=_outdir) # 6. If entirely new data, update authorisations (break this down) update_auth(_user, _species, _population, _dataset, _phenos) + logger.info("Updating authorisation.") return 0 diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index eb926d8..f4d3529 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -336,12 +336,14 @@ def save_phenotypes_data( }[table] saved_data = tuple() with conn.cursor(cursorclass=DictCursor) as cursor: + _count = 0 while True: batch = take(data, 5000): if len(batch) == 0: logger.warning("Got an empty batch. This needs investigation.") break + logger.debug("Saving batch of %s items.", len(batch)) cursor.executemany( (f"INSERT INTO {_table_details['table']}" f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) " @@ -359,5 +361,8 @@ def save_phenotypes_data( for item in batch) for single in items)) saved_data = saved_data + tuple(dict(row) for row in cursor.fetchall()) + _count = _count + len(batch) + + logger.debug("Saved a total of %s data rows", _count) return saved_data |