diff options
author | Frederick Muriuki Muriithi | 2025-01-27 16:00:39 -0600 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-01-27 16:04:00 -0600 |
commit | 75d20d9ddd88e32b149892f5686808dbb5e53ee3 (patch) | |
tree | e6b02da7329badfee2b4f095eaa70fce2975c79f | |
parent | b2a5013e192c2116442253bf274f80fa2ae8dd58 (diff) | |
download | gn-uploader-75d20d9ddd88e32b149892f5686808dbb5e53ee3.tar.gz |
Fix bug in how the SE and N values for a phenotype are fetched.
-rw-r--r-- | uploader/phenotypes/models.py | 66 |
1 files changed, 37 insertions, 29 deletions
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index 9b07000..c9afc22 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -87,31 +87,41 @@ def dataset_phenotypes(conn: mdb.Connection, return tuple(dict(row) for row in cursor.fetchall()) -def __phenotype_se__(cursor: Cursor, - species_id: int, - population_id: int, - dataset_id: int, - xref_id: str) -> dict: +def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): """Fetch standard-error values (if they exist) for a phenotype.""" - _sequery = ( - "SELECT pxr.Id AS xref_id, pxr.DataId, str.Id AS StrainId, pse.error, nst.count " - "FROM Phenotype AS pheno " - "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " - "INNER JOIN PublishSE AS pse ON pxr.DataId=pse.DataId " - "INNER JOIN NStrain AS nst ON pse.DataId=nst.DataId " - "INNER JOIN Strain AS str ON nst.StrainId=str.Id " - "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " - "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " - "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " - "WHERE (str.SpeciesId, pxr.InbredSetId, pf.Id, pxr.Id)=(%s, %s, %s, %s)") - cursor.execute(_sequery, - (species_id, population_id, dataset_id, xref_id)) - return {(row["DataId"], row["StrainId"]): { - "xref_id": row["xref_id"], - "DataId": row["DataId"], - "error": row["error"], - "count": row["count"] - } for row in cursor.fetchall()} + paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids)) + flat = tuple(item for sublist in dataids_and_strainids for item in sublist) + cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, app.logger) + _se = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "error": row["error"] + } + for row in cursor.fetchall() + } + + cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, app.logger) + _n = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "count": row["count"] + } + for row in cursor.fetchall() + } + + keys = set(tuple(_se.keys()) + tuple(_n.keys())) + return { + key: {"xref_id": xref_id, **_se.get(key,{}), **_n.get(key,{})} + for key in keys + } def __organise_by_phenotype__(pheno, row): """Organise disparate data rows into phenotype 'objects'.""" @@ -185,11 +195,9 @@ def phenotype_by_id( **_pheno, "data": tuple(__merge_pheno_data_and_se__( _pheno["data"], - __phenotype_se__(cursor, - species_id, - population_id, - dataset_id, - xref_id)).values()) + __phenotype_se__( + cursor, xref_id, tuple(_pheno["data"].keys())) + ).values()) } if bool(_pheno) and len(_pheno.keys()) > 1: raise Exception( |