aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/traits.py
diff options
context:
space:
mode:
authorBonfaceKilz2022-02-25 12:46:54 +0300
committerBonfaceKilz2022-03-12 15:33:09 +0300
commit66595875c7f4e7cbadf5210bb66d90043ef8ffa1 (patch)
treeb07bb1c0b221ee4e20c1a7d8fa1f13b50ecaf22d /gn3/db/traits.py
parent8a7335cdb3fe9e906cefa719e336a7266a9b253b (diff)
downloadgenenetwork3-66595875c7f4e7cbadf5210bb66d90043ef8ffa1.tar.gz
Move operations on sample_data to it's own module
Diffstat (limited to 'gn3/db/traits.py')
-rw-r--r--gn3/db/traits.py283
1 files changed, 0 insertions, 283 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 04b2a70..2e634fe 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -79,289 +79,6 @@ def export_trait_data(
return reduce(__exporter, samplelist, tuple())
-def get_trait_csv_sample_data(conn: Any,
- trait_name: int, phenotype_id: int) -> str:
- """Fetch a trait and return it as a csv string"""
- __query = ("SELECT concat(st.Name, ',', ifnull(pd.value, 'x'), ',', "
- "ifnull(ps.error, 'x'), ',', ifnull(ns.count, 'x')) as 'Data' "
- ",ifnull(ca.Name, 'x') as 'CaseAttr', "
- "ifnull(cxref.value, 'x') as 'Value' "
- "FROM PublishFreeze pf "
- "JOIN PublishXRef px ON px.InbredSetId = pf.InbredSetId "
- "JOIN PublishData pd ON pd.Id = px.DataId "
- "JOIN Strain st ON pd.StrainId = st.Id "
- "LEFT JOIN PublishSE ps ON ps.DataId = pd.Id "
- "AND ps.StrainId = pd.StrainId "
- "LEFT JOIN NStrain ns ON ns.DataId = pd.Id "
- "AND ns.StrainId = pd.StrainId "
- "LEFT JOIN CaseAttributeXRefNew cxref ON "
- "(cxref.InbredSetId = px.InbredSetId AND "
- "cxref.StrainId = st.Id) "
- "LEFT JOIN CaseAttribute ca ON ca.Id = cxref.CaseAttributeId "
- "WHERE px.Id = %s AND px.PhenotypeId = %s ORDER BY st.Name")
- case_attr_columns = set()
- csv_data = {}
- with conn.cursor() as cursor:
- cursor.execute(__query, (trait_name, phenotype_id))
- for data in cursor.fetchall():
- if data[1] == "x":
- csv_data[data[0]] = None
- else:
- sample, case_attr, value = data[0], data[1], data[2]
- if not csv_data.get(sample):
- csv_data[sample] = {}
- csv_data[sample][case_attr] = None if value == "x" else value
- case_attr_columns.add(case_attr)
- if not case_attr_columns:
- return ("Strain Name,Value,SE,Count\n" +
- "\n".join(csv_data.keys()))
- else:
- columns = sorted(case_attr_columns)
- csv = ("Strain Name,Value,SE,Count," +
- ",".join(columns) + "\n")
- for key, value in csv_data.items():
- if not value:
- csv += (key + (len(case_attr_columns) * ",x") + "\n")
- else:
- vals = [str(value.get(column, "x")) for column in columns]
- csv += (key + "," + ",".join(vals) + "\n")
- return csv
- return "No Sample Data Found"
-
-
-def update_sample_data(conn: Any, # pylint: disable=[R0913]
- trait_name: str,
- strain_name: str,
- phenotype_id: int,
- value: Union[int, float, str],
- error: Union[int, float, str],
- count: Union[int, str]):
- """Given the right parameters, update sample-data from the relevant
- table."""
- strain_id, data_id = "", ""
-
- with conn.cursor() as cursor:
- cursor.execute(
- ("SELECT Strain.Id, PublishData.Id FROM "
- "(PublishData, Strain, PublishXRef, PublishFreeze) "
- "LEFT JOIN PublishSE ON "
- "(PublishSE.DataId = PublishData.Id AND "
- "PublishSE.StrainId = PublishData.StrainId) "
- "LEFT JOIN NStrain ON "
- "(NStrain.DataId = PublishData.Id AND "
- "NStrain.StrainId = PublishData.StrainId) "
- "WHERE PublishXRef.InbredSetId = "
- "PublishFreeze.InbredSetId AND "
- "PublishData.Id = PublishXRef.DataId AND "
- "PublishXRef.Id = %s AND "
- "PublishXRef.PhenotypeId = %s "
- "AND PublishData.StrainId = Strain.Id "
- "AND Strain.Name = %s"),
- (trait_name, phenotype_id, str(strain_name)))
- strain_id, data_id = cursor.fetchone()
- updated_published_data: int = 0
- updated_se_data: int = 0
- updated_n_strains: int = 0
-
- with conn.cursor() as cursor:
- # Update the PublishData table
- if value == "x":
- cursor.execute(("DELETE FROM PublishData "
- "WHERE StrainId = %s AND Id = %s"),
- (strain_id, data_id))
- updated_published_data = cursor.rowcount
- else:
- cursor.execute(("UPDATE PublishData SET value = %s "
- "WHERE StrainId = %s AND Id = %s"),
- (value, strain_id, data_id))
- updated_published_data = cursor.rowcount
-
- if not updated_published_data:
- cursor.execute(
- ("SELECT * FROM "
- "PublishData WHERE StrainId = "
- "%s AND Id = %s"),
- (strain_id, data_id))
- if not cursor.fetchone():
- cursor.execute(("INSERT INTO PublishData (Id, StrainId, "
- " value) VALUES (%s, %s, %s)"),
- (data_id, strain_id, value))
- updated_published_data = cursor.rowcount
-
- # Update the PublishSE table
- if error == "x":
- cursor.execute(("DELETE FROM PublishSE "
- "WHERE StrainId = %s AND DataId = %s"),
- (strain_id, data_id))
- updated_se_data = cursor.rowcount
- else:
- cursor.execute(("UPDATE PublishSE SET error = %s "
- "WHERE StrainId = %s AND DataId = %s"),
- (None if error == "x" else error,
- strain_id, data_id))
- updated_se_data = cursor.rowcount
- if not updated_se_data:
- cursor.execute(
- ("SELECT * FROM "
- "PublishSE WHERE StrainId = "
- "%s AND DataId = %s"),
- (strain_id, data_id))
- if not cursor.fetchone():
- cursor.execute(
- ("INSERT INTO PublishSE (StrainId, DataId, "
- " error) VALUES (%s, %s, %s)"),
- (strain_id, data_id, None if error == "x" else error))
- updated_se_data = cursor.rowcount
-
- # Update the NStrain table
- if count == "x":
- cursor.execute(("DELETE FROM NStrain "
- "WHERE StrainId = %s AND DataId = %s"),
- (strain_id, data_id))
- updated_n_strains = cursor.rowcount
- else:
- cursor.execute(("UPDATE NStrain SET count = %s "
- "WHERE StrainId = %s AND DataId = %s"),
- (count, strain_id, data_id))
- updated_n_strains = cursor.rowcount
- if not updated_n_strains:
- cursor.execute(
- ("SELECT * FROM "
- "NStrain WHERE StrainId = "
- "%s AND DataId = %s"),
- (strain_id, data_id))
- if not cursor.fetchone():
- cursor.execute(("INSERT INTO NStrain "
- "(StrainId, DataId, count) "
- "VALUES (%s, %s, %s)"),
- (strain_id, data_id, count))
- updated_n_strains = cursor.rowcount
- return (updated_published_data,
- updated_se_data, updated_n_strains)
-
-
-def delete_sample_data(conn: Any,
- trait_name: str,
- strain_name: str,
- phenotype_id: int):
- """Given the right parameters, delete sample-data from the relevant
- table."""
- strain_id, data_id = "", ""
-
- deleted_published_data: int = 0
- deleted_se_data: int = 0
- deleted_n_strains: int = 0
-
- with conn.cursor() as cursor:
- # Delete the PublishData table
- try:
- cursor.execute(
- ("SELECT Strain.Id, PublishData.Id FROM "
- "(PublishData, Strain, PublishXRef, PublishFreeze) "
- "LEFT JOIN PublishSE ON "
- "(PublishSE.DataId = PublishData.Id AND "
- "PublishSE.StrainId = PublishData.StrainId) "
- "LEFT JOIN NStrain ON "
- "(NStrain.DataId = PublishData.Id AND "
- "NStrain.StrainId = PublishData.StrainId) "
- "WHERE PublishXRef.InbredSetId = "
- "PublishFreeze.InbredSetId AND "
- "PublishData.Id = PublishXRef.DataId AND "
- "PublishXRef.Id = %s AND "
- "PublishXRef.PhenotypeId = %s "
- "AND PublishData.StrainId = Strain.Id "
- "AND Strain.Name = %s"),
- (trait_name, phenotype_id, str(strain_name)))
-
- # Check if it exists if the data was already deleted:
- if _result := cursor.fetchone():
- strain_id, data_id = _result
-
- # Only run if the strain_id and data_id exist
- if strain_id and data_id:
- cursor.execute(("DELETE FROM PublishData "
- "WHERE StrainId = %s AND Id = %s"),
- (strain_id, data_id))
- deleted_published_data = cursor.rowcount
-
- # Delete the PublishSE table
- cursor.execute(("DELETE FROM PublishSE "
- "WHERE StrainId = %s AND DataId = %s"),
- (strain_id, data_id))
- deleted_se_data = cursor.rowcount
-
- # Delete the NStrain table
- cursor.execute(("DELETE FROM NStrain "
- "WHERE StrainId = %s AND DataId = %s"),
- (strain_id, data_id))
- deleted_n_strains = cursor.rowcount
- except Exception as e: #pylint: disable=[C0103, W0612]
- conn.rollback()
- raise MySQLdb.Error
- conn.commit()
- cursor.close()
- cursor.close()
-
- return (deleted_published_data,
- deleted_se_data, deleted_n_strains)
-
-
-def insert_sample_data(conn: Any, # pylint: disable=[R0913]
- trait_name: str,
- strain_name: str,
- phenotype_id: int,
- value: Union[int, float, str],
- error: Union[int, float, str],
- count: Union[int, str]):
- """Given the right parameters, insert sample-data to the relevant table.
-
- """
-
- inserted_published_data, inserted_se_data, inserted_n_strains = 0, 0, 0
- with conn.cursor() as cursor:
- try:
- cursor.execute("SELECT DataId FROM PublishXRef WHERE Id = %s AND "
- "PhenotypeId = %s", (trait_name, phenotype_id))
- data_id = cursor.fetchone()
-
- cursor.execute("SELECT Id FROM Strain WHERE Name = %s",
- (strain_name,))
- strain_id = cursor.fetchone()
-
- # Return early if an insert already exists!
- cursor.execute("SELECT Id FROM PublishData where Id = %s "
- "AND StrainId = %s",
- (data_id, strain_id))
- if cursor.fetchone(): # This strain already exists
- return (0, 0, 0)
-
- # Insert the PublishData table
- cursor.execute(("INSERT INTO PublishData (Id, StrainId, value)"
- "VALUES (%s, %s, %s)"),
- (data_id, strain_id, value))
- inserted_published_data = cursor.rowcount
-
- # Insert into the PublishSE table if error is specified
- if error and error != "x":
- cursor.execute(("INSERT INTO PublishSE (StrainId, DataId, "
- " error) VALUES (%s, %s, %s)"),
- (strain_id, data_id, error))
- inserted_se_data = cursor.rowcount
-
- # Insert into the NStrain table
- if count and count != "x":
- cursor.execute(("INSERT INTO NStrain "
- "(StrainId, DataId, count) "
- "VALUES (%s, %s, %s)"),
- (strain_id, data_id, count))
- inserted_n_strains = cursor.rowcount
- except Exception: # pylint: disable=[C0103, W0612]
- conn.rollback()
- raise MySQLdb.Error
- return (inserted_published_data,
- inserted_se_data, inserted_n_strains)
-
-
def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `Publish` traits.