diff options
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 36 | ||||
-rw-r--r-- | uploader/phenotypes/models.py | 18 |
2 files changed, 21 insertions, 33 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index fdfab4c..d3d9f5a 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -242,12 +242,12 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: conn, _species["SpeciesId"], _population["Id"])} # b. Save all the data items (DataIds are vibes), return new IDs logger.info("Saving new phenotypes data.") - data = save_pheno_data(conn=conn, - dataidmap=dataidmap, - pheno_name2id=pheno_name2id, - samples=samples, - control_data=_control_data, - filesdir=_outdir) + _num_data_rows = save_pheno_data(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef logger.info("Cross-referencing new phenotypes to their data and publications.") xrefs = cross_reference_phenotypes_publications_and_data( @@ -255,19 +255,19 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: # 5. If standard errors and N exist, save them too # (use IDs returned in `3. b.` above). logger.info("Saving new phenotypes standard errors.") - _data_se = save_phenotypes_se(conn=conn, - dataidmap=dataidmap, - pheno_name2id=pheno_name2id, - samples=samples, - control_data=_control_data, - filesdir=_outdir) + _num_se_rows = save_phenotypes_se(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) logger.info("Saving new phenotypes sample counts.") - _data_n = save_phenotypes_n(conn=conn, - dataidmap=dataidmap, - pheno_name2id=pheno_name2id, - samples=samples, - control_data=_control_data, - filesdir=_outdir) + _num_n_rows = save_phenotypes_n(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) # 6. If entirely new data, update authorisations (break this down) logger.info("Updating authorisation.") update_auth(_token, _species, _population, _dataset, _phenos) diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index f4d3529..7861427 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -324,7 +324,7 @@ def save_phenotypes_data( conn: mdb.Connection, table: str, data: Iterable[dict] -) -> tuple[dict, ...]: +) -> int: """Save new phenotypes data into the database.""" _table_details = { "PublishData": { @@ -334,7 +334,6 @@ def save_phenotypes_data( "NStrain": { "table": "PublishData", "valueCol": "count", "DataIdCol": "DataId"} }[table] - saved_data = tuple() with conn.cursor(cursorclass=DictCursor) as cursor: _count = 0 while True: @@ -348,21 +347,10 @@ def save_phenotypes_data( (f"INSERT INTO {_table_details['table']}" f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) " "VALUES " - f"(%(data_id)s, %(sample_id)s, %({_table_details['valueCol']})s) " - "RETURNING *"), + f"(%(data_id)s, %(sample_id)s, %({_table_details['valueCol']})s) "), tuple(batch)) - - paramstr = ", ".join(["(%s, %s)"] * len(batch)) - cursor.execute(f"SELECT * FROM {_table_details['table']} " - f"WHERE ({_table_details['DataIdCol']}, StrainId) " - f"IN ({paramstr})", - tuple(single for items in - ((item["data_id"], item["sample_id"]) - for item in batch) - for single in items)) - saved_data = saved_data + tuple(dict(row) for row in cursor.fetchall()) _count = _count + len(batch) logger.debug("Saved a total of %s data rows", _count) - return saved_data + return _count |