aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/load_phenotypes_to_db.py36
-rw-r--r--uploader/phenotypes/models.py18
2 files changed, 21 insertions, 33 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index fdfab4c..d3d9f5a 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -242,12 +242,12 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
conn, _species["SpeciesId"], _population["Id"])}
# b. Save all the data items (DataIds are vibes), return new IDs
logger.info("Saving new phenotypes data.")
- data = save_pheno_data(conn=conn,
- dataidmap=dataidmap,
- pheno_name2id=pheno_name2id,
- samples=samples,
- control_data=_control_data,
- filesdir=_outdir)
+ _num_data_rows = save_pheno_data(conn=conn,
+ dataidmap=dataidmap,
+ pheno_name2id=pheno_name2id,
+ samples=samples,
+ control_data=_control_data,
+ filesdir=_outdir)
# 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
logger.info("Cross-referencing new phenotypes to their data and publications.")
xrefs = cross_reference_phenotypes_publications_and_data(
@@ -255,19 +255,19 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
# 5. If standard errors and N exist, save them too
# (use IDs returned in `3. b.` above).
logger.info("Saving new phenotypes standard errors.")
- _data_se = save_phenotypes_se(conn=conn,
- dataidmap=dataidmap,
- pheno_name2id=pheno_name2id,
- samples=samples,
- control_data=_control_data,
- filesdir=_outdir)
+ _num_se_rows = save_phenotypes_se(conn=conn,
+ dataidmap=dataidmap,
+ pheno_name2id=pheno_name2id,
+ samples=samples,
+ control_data=_control_data,
+ filesdir=_outdir)
logger.info("Saving new phenotypes sample counts.")
- _data_n = save_phenotypes_n(conn=conn,
- dataidmap=dataidmap,
- pheno_name2id=pheno_name2id,
- samples=samples,
- control_data=_control_data,
- filesdir=_outdir)
+ _num_n_rows = save_phenotypes_n(conn=conn,
+ dataidmap=dataidmap,
+ pheno_name2id=pheno_name2id,
+ samples=samples,
+ control_data=_control_data,
+ filesdir=_outdir)
# 6. If entirely new data, update authorisations (break this down)
logger.info("Updating authorisation.")
update_auth(_token, _species, _population, _dataset, _phenos)
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index f4d3529..7861427 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -324,7 +324,7 @@ def save_phenotypes_data(
conn: mdb.Connection,
table: str,
data: Iterable[dict]
-) -> tuple[dict, ...]:
+) -> int:
"""Save new phenotypes data into the database."""
_table_details = {
"PublishData": {
@@ -334,7 +334,6 @@ def save_phenotypes_data(
"NStrain": {
"table": "PublishData", "valueCol": "count", "DataIdCol": "DataId"}
}[table]
- saved_data = tuple()
with conn.cursor(cursorclass=DictCursor) as cursor:
_count = 0
while True:
@@ -348,21 +347,10 @@ def save_phenotypes_data(
(f"INSERT INTO {_table_details['table']}"
f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) "
"VALUES "
- f"(%(data_id)s, %(sample_id)s, %({_table_details['valueCol']})s) "
- "RETURNING *"),
+ f"(%(data_id)s, %(sample_id)s, %({_table_details['valueCol']})s) "),
tuple(batch))
-
- paramstr = ", ".join(["(%s, %s)"] * len(batch))
- cursor.execute(f"SELECT * FROM {_table_details['table']} "
- f"WHERE ({_table_details['DataIdCol']}, StrainId) "
- f"IN ({paramstr})",
- tuple(single for items in
- ((item["data_id"], item["sample_id"])
- for item in batch)
- for single in items))
- saved_data = saved_data + tuple(dict(row) for row in cursor.fetchall())
_count = _count + len(batch)
logger.debug("Saved a total of %s data rows", _count)
- return saved_data
+ return _count