diff options
author | BonfaceKilz | 2021-07-29 22:15:36 +0300 |
---|---|---|
committer | GitHub | 2021-07-29 22:15:36 +0300 |
commit | 00278bc237c90b4ac276171a32bbe57e056644b6 (patch) | |
tree | f6ff2fd64cd288d75af4900cb7b2cd984f598ce0 /gn3/db/traits.py | |
parent | 2ccbf1844afe352e23af7ff958ec2b0694cd87ea (diff) | |
parent | 8d83ba7298c33e54ab204effe2a4241ce848add9 (diff) | |
download | genenetwork3-00278bc237c90b4ac276171a32bbe57e056644b6.tar.gz |
Merge pull request #30 from genenetwork/Feature/Update-db-from-csv-data
Feature/update db from csv data
Diffstat (limited to 'gn3/db/traits.py')
-rw-r--r-- | gn3/db/traits.py | 165 |
1 files changed, 76 insertions, 89 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py index f18e16a..ae1939a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,95 +1,81 @@ -"""This contains all the necessary functions that are required to add traits -to the published database""" -from dataclasses import dataclass -from typing import Any, Dict, Optional - - -@dataclass(frozen=True) -class Riset: - """Class for keeping track of riset. A riset is a group e.g. rat HSNIH-Palmer, -BXD - - """ - name: Optional[str] - r_id: Optional[int] - - -@dataclass(frozen=True) -class WebqtlCaseData: - """Class for keeping track of one case data in one trait""" - value: Optional[float] = None - variance: Optional[float] = None - count: Optional[int] = None # Number of Individuals - - def __str__(self): - _str = "" - if self.value: - _str += f"value={self.value:.3f}" - if self.variance: - _str += f" variance={self.variance:.3f}" - if self.count: - _str += " n_data={self.count}" - return _str - - -def lookup_webqtldataset_name(riset_name: str, conn: Any): - """Given a group name(riset), return it's name e.g. BXDPublish, -HLCPublish.""" +"""This class contains functions relating to trait data manipulation""" +from typing import Any, Dict, Union + + +def get_trait_csv_sample_data(conn: Any, + trait_name: int, phenotype_id: int): + """Fetch a trait and return it as a csv string""" + sql = ("SELECT Strain.Id, PublishData.Id, Strain.Name, " + "PublishData.value, " + "PublishSE.error, NStrain.count FROM " + "(PublishData, Strain, PublishXRef, PublishFreeze) " + "LEFT JOIN PublishSE ON " + "(PublishSE.DataId = PublishData.Id AND " + "PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON (NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) WHERE " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s " + "AND PublishData.StrainId = Strain.Id Order BY Strain.Name") + csv_data = ["Strain Id,Strain Name,Value,SE,Count"] + publishdata_id = "" with conn.cursor() as cursor: - cursor.execute( - "SELECT PublishFreeze.Name FROM " - "PublishFreeze, InbredSet WHERE " - "PublishFreeze.InbredSetId = InbredSet.Id " - "AND InbredSet.Name = '%s'" % riset_name) - _result, *_ = cursor.fetchone() - return _result + cursor.execute(sql, (trait_name, phenotype_id,)) + for record in cursor.fetchall(): + (strain_id, publishdata_id, + strain_name, value, error, count) = record + csv_data.append( + ",".join([str(val) if val else "x" + for val in (strain_id, strain_name, + value, error, count)])) + return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data) + + +def update_sample_data(conn: Any, + strain_name: str, + strain_id: int, + publish_data_id: int, + value: Union[int, float, str], + error: Union[int, float, str], + count: Union[int, str]): + """Given the right parameters, update sample-data from the relevant + table.""" + STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" + PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND Id = %s") + PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s " + "WHERE StrainId = %s AND DataId = %s") + + updated_strains: int = 0 + updated_published_data: int = 0 + updated_se_data: int = 0 + updated_n_strains: int = 0 - -def get_riset(data_type: str, name: str, conn: Any): - """Get the groups given the data type and it's PublishFreeze or GenoFreeze -name - - """ - query, _name, _id = None, None, None - if data_type == "Publish": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "PublishFreeze WHERE PublishFreeze.InbredSetId = " - "InbredSet.Id AND PublishFreeze.Name = '%s'" % name) - elif data_type == "Geno": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "GenoFreeze WHERE GenoFreeze.InbredSetId = " - "InbredSet.Id AND GenoFreeze.Name = '%s'" % name) - elif data_type == "ProbeSet": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM " - "InbredSet, ProbeSetFreeze, ProbeFreeze WHERE " - "ProbeFreeze.InbredSetId = InbredSet.Id AND " - "ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND " - "ProbeSetFreeze.Name = '%s'" % name) - if query: - with conn.cursor() as cursor: - _name, _id = cursor.fetchone() - if _name == "BXD300": - _name = "BXD" - return Riset(_name, _id) - - -def insert_publication(pubmed_id: int, publication: Optional[Dict], - conn: Any): - """Creates a new publication record if it's not available""" - sql = ("SELECT Id FROM Publication where " - "PubMed_ID = %d" % pubmed_id) - _id = None with conn.cursor() as cursor: - cursor.execute(sql) - _id = cursor.fetchone() - if not _id and publication: - # The Publication contains the fields: 'authors', 'title', 'abstract', - # 'journal','volume','pages','month','year' - insert_query = ("INSERT into Publication (%s) Values (%s)" % - (", ".join(publication.keys()), - ", ".join(['%s'] * len(publication)))) - with conn.cursor() as cursor: - cursor.execute(insert_query, tuple(publication.values())) + # Update the Strains table + cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id)) + updated_strains: int = cursor.rowcount + # Update the PublishData table + cursor.execute(PUBLISH_DATA_SQL, + (None if value == "x" else value, + strain_id, publish_data_id)) + updated_published_data: int = cursor.rowcount + # Update the PublishSE table + cursor.execute(PUBLISH_SE_SQL, + (None if error == "x" else error, + strain_id, publish_data_id)) + updated_se_data: int = cursor.rowcount + # Update the NStrain table + cursor.execute(N_STRAIN_SQL, + (None if count == "x" else count, + strain_id, publish_data_id)) + updated_n_strains: int = cursor.rowcount + return (updated_strains, updated_published_data, + updated_se_data, updated_n_strains) + def retrieve_trait_dataset_name( trait_type: str, threshold: int, name: str, connection: Any): @@ -137,6 +123,7 @@ PUBLISH_TRAIT_INFO_QUERY = ( "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " "PublishFreeze.Id =%(trait_dataset_id)s") + def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. |