diff options
author | zsloan | 2023-08-02 18:37:25 +0000 |
---|---|---|
committer | zsloan | 2023-08-17 14:54:42 -0500 |
commit | 614de6ac83e7d89eebbee056046303e6cb39e9c5 (patch) | |
tree | 5a39fb0d190cf04e99ce07b5b5e5a83c4d1aba85 /gn3 | |
parent | 0d7412cff90b77b0dd0fd7b1ba036e76c146a0d4 (diff) | |
download | genenetwork3-614de6ac83e7d89eebbee056046303e6cb39e9c5.tar.gz |
Make several sample_data fetching functions specific to phenotype or mrna assay resources, and change their naming accordingly
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/db/sample_data.py | 95 |
1 files changed, 90 insertions, 5 deletions
diff --git a/gn3/db/sample_data.py b/gn3/db/sample_data.py index 92032d6..50a7561 100644 --- a/gn3/db/sample_data.py +++ b/gn3/db/sample_data.py @@ -57,10 +57,35 @@ def __extract_actions( ) return result -def get_trait_sample_data( +def get_mrna_sample_data( + conn: Any, trait_name: str +) -> Dict: + """Fetch a mRNA Assay (ProbeSet in the DB) trait's sample data and return it as a dict""" + with conn.cursor() as cursor: + cursor.execute(""" +SELECT st.Name, ifnull(pd.value, 'x'), ifnull(ps.error, 'x'), ifnull(ns.count, 'x') +FROM ProbeSetFreeze pf JOIN ProbeSetXRef px ON px.InbredSetId = pf.InbredSetId + JOIN ProbeSetData pd ON pd.Id = px.DataId JOIN Strain st ON pd.StrainId = st.Id + LEFT JOIN ProbeSetSE ps ON ps.DataId = pd.Id AND ps.StrainId = pd.StrainId + LEFT JOIN NStrain ns ON ns.DataId = pd.Id AND ns.StrainId = pd.StrainId +WHERE px.Id = %s +ORDER BY st.Name""", (trait_name)) + + sample_data = {} + for data in cursor.fetchall(): + sample, value, error, n_cases = data + sample_data[sample] = { + 'value': value, + 'error': error, + 'n_cases': n_cases + } + + return sample_data + +def get_pheno_sample_data( conn: Any, trait_name: int, phenotype_id: int ) -> Dict: - """Fetch a trait's sample data and return it as a dict""" + """Fetch a phenotype (Publish in the DB) trait's sample data and return it as a dict""" with conn.cursor() as cursor: cursor.execute(""" SELECT st.Name, ifnull(pd.value, 'x'), ifnull(ps.error, 'x'), ifnull(ns.count, 'x') @@ -82,10 +107,39 @@ ORDER BY st.Name""", (trait_name, phenotype_id)) return sample_data -def get_trait_csv_sample_data( +def get_mrna_csv_sample_data( + conn: Any, trait_name: int, sample_list: list +) -> str: + """Fetch a mRNA Assay (ProbeSet in DB) trait and return it as a csv string""" + with conn.cursor() as cursor: + cursor.execute(""" +SELECT DISTINCT st.Name, concat(st.Name, ',', ifnull(pd.value, 'x'), ',', +ifnull(ps.error, 'x'), ',', ifnull(ns.count, 'x')) AS 'Data' +FROM ProbeSetFreeze pf JOIN ProbeSetXRef px ON px.InbredSetId = pf.InbredSetId +JOIN ProbeSetData pd ON pd.Id = px.DataId JOIN Strain st ON pd.StrainId = st.Id +LEFT JOIN ProbeSetSE ps ON ps.DataId = pd.Id AND ps.StrainId = pd.StrainId +LEFT JOIN NStrain ns ON ns.DataId = pd.Id AND ns.StrainId = pd.StrainId +WHERE px.Id = %s ORDER BY st.Name""", + (trait_name)) + if not (data := cursor.fetchall()): + return "No Sample Data Found" + + # Get list of samples with data in the DB + existing_samples = [el[0] for el in data] + + trait_csv = ["Strain Name,Value,SE,Count"] + for sample in sample_list: + if sample in existing_samples: + trait_csv.append(data[existing_samples.index(sample)][1]) + else: + trait_csv.append(sample + ",x,x,x") + + return "\n".join(trait_csv) + +def get_pheno_csv_sample_data( conn: Any, trait_name: int, phenotype_id: int, sample_list: list ) -> str: - """Fetch a trait and return it as a csv string""" + """Fetch a phenotype (Publish in DB) trait and return it as a csv string""" with conn.cursor() as cursor: cursor.execute(""" SELECT DISTINCT st.Name, concat(st.Name, ',', ifnull(pd.value, 'x'), ',', @@ -111,8 +165,39 @@ WHERE px.Id = %s AND px.PhenotypeId = %s ORDER BY st.Name""", return "\n".join(trait_csv) +def get_mrna_sample_data_ids( + conn: Any, probsetxref_id: int, strain_name: str +) -> Tuple: + """Get the strain_id, probesetdata_id and inbredset_id for a given strain""" + strain_id, probesetdata_id, inbredset_id = None, None, None + with conn.cursor() as cursor: + cursor.execute( + "SELECT st.id, pd.Id, pf.InbredSetId " + "FROM ProbeSetData pd " + "JOIN Strain st ON pd.StrainId = st.Id " + "JOIN ProbeSetXRef px ON px.DataId = pd.Id " + "JOIN ProbeSetFreeze pf ON pf.InbredSetId " + "= px.InbredSetId WHERE px.Id = %s " + "AND px.PhenotypeId = %s AND st.Name = %s", + (probsetxref_id, strain_name), + ) + if _result := cursor.fetchone(): + strain_id, probesetdata_id, inbredset_id = _result + if not all([strain_id, probesetdata_id, inbredset_id]): + # Applies for data to be inserted: + cursor.execute( + "SELECT DataId, InbredSetId FROM ProbeSetXRef " + "WHERE Id = %s", + (probesetxref_id), + ) + probsetdata_id, inbredset_id = cursor.fetchone() + cursor.execute( + "SELECT Id FROM Strain WHERE Name = %s", (strain_name,) + ) + strain_id = cursor.fetchone()[0] + return (strain_id, probesetdata_id, inbredset_id) -def get_sample_data_ids( +def get_pheno_sample_data_ids( conn: Any, publishxref_id: int, phenotype_id: int, strain_name: str ) -> Tuple: """Get the strain_id, publishdata_id and inbredset_id for a given strain""" |