diff options
-rw-r--r-- | misc/gn_installation_notes.txt | 19 | ||||
-rwxr-xr-x | wqflask/base/data_set.py | 90 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 12 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/correlation_function.py (renamed from wqflask/wqflask/correlation/correlationFunction.py) | 0 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 130 | ||||
-rw-r--r-- | wqflask/wqflask/search_results.py | 8 |
6 files changed, 181 insertions, 78 deletions
diff --git a/misc/gn_installation_notes.txt b/misc/gn_installation_notes.txt index 46441b39..7545a5b8 100644 --- a/misc/gn_installation_notes.txt +++ b/misc/gn_installation_notes.txt @@ -160,6 +160,25 @@ service mysql start Stop MySQL server: service mysql stop +Change root password: +mysql> UPDATE mysql.user SET Password=PASSWORD('your password') WHERE User='root'; + +Setup accounts in MySQL (first need to delete anonymous/non-root accounts): +#; use mysql; +#; select * from user; +#; delete from user where Host!="localhost"; +#; delete from user where User!="root"; +#; update user set Password = Password('yourpassword') where User='root'; +#; GRANT ALL ON *.* TO 'yourusername'@'%' IDENTIFIED BY 'yourpassword'; +#; select * from user; + +============================================ + +Check RSA key: +ssh-keygen -l -f /etc/ssh/ssh_host_rsa_key + +03:2c:d7:01:01:f0:31:3a:c8:df:e4:98:62:2c:59:d2 root@penguin (RSA) + ============================================ Using Yolk diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 03b24230..30221503 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -48,32 +48,67 @@ from MySQLdb import escape_string as escape from pprint import pformat as pf # Used by create_database to instantiate objects +# Each subclass will add to this DS_NAME_MAP = {} def create_dataset(dataset_name, dataset_type = None): - #print("dataset_name:", dataset_name) - + + print("dataset_type:", dataset_type) if not dataset_type: - query = """ - SELECT DBType.Name - FROM DBList, DBType - WHERE DBList.Name = '{}' and - DBType.Id = DBList.DBTypeId - """.format(escape(dataset_name)) - #print("query is: ", pf(query)) - dataset_type = g.db.execute(query).fetchone().Name + dataset_type = Dataset_Getter(dataset_name) + #dataset_type = get_dataset_type_from_json(dataset_name) - #dataset_type = cursor.fetchone()[0] - #print("[blubber] dataset_type:", pf(dataset_type)) + print("dataset_type is:", dataset_type) + #query = """ + # SELECT DBType.Name + # FROM DBList, DBType + # WHERE DBList.Name = '{}' and + # DBType.Id = DBList.DBTypeId + # """.format(escape(dataset_name)) + #dataset_type = g.db.execute(query).fetchone().Name - dataset_ob = DS_NAME_MAP[dataset_type] - #dataset_class = getattr(data_set, dataset_ob) - #print("dataset_ob:", dataset_ob) - #print("DS_NAME_MAP:", pf(DS_NAME_MAP)) + dataset_ob = DS_NAME_MAP[dataset_type] dataset_class = globals()[dataset_ob] return dataset_class(dataset_name) + +#def get_dataset_type_from_json(dataset_name): + +class Dataset_Types(object): + + def __init__(self): + self.datasets = {} + file_name = "wqflask/static/new/javascript/dataset_menu_structure.json" + with open(file_name, 'r') as fh: + data = json.load(fh) + + print("*" * 70) + for species in data['datasets']: + for group in data['datasets'][species]: + for dataset_type in data['datasets'][species][group]: + for dataset in data['datasets'][species][group][dataset_type]: + print("dataset is:", dataset) + + short_dataset_name = dataset[0] + if dataset_type == "Phenotypes": + new_type = "Publish" + elif dataset_type == "Genotypes": + new_type = "Geno" + else: + new_type = "ProbeSet" + self.datasets[short_dataset_name] = new_type + + def __call__(self, name): + return self.datasets[name] + +# Do the intensive work at startup one time only +Dataset_Getter = Dataset_Types() + +# +#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611")) + + def create_datasets_list(): key = "all_datasets" result = Redis.get(key) @@ -212,7 +247,7 @@ class DatasetGroup(object): marker_class = Markers self.markers = marker_class(self.name) - + def get_f1_parent_strains(self): try: @@ -225,7 +260,7 @@ class DatasetGroup(object): self.f1list = [f1, f12] if maternal and paternal: self.parlist = [maternal, paternal] - + def read_genotype_file(self): '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': @@ -375,6 +410,9 @@ class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' def setup(self): + + print("IS A PHENOTYPEDATASET") + # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', @@ -445,14 +483,24 @@ class PhenotypeDataSet(DataSet): def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: if not this_trait.haveinfo: - this_trait.retrieveInfo(QTL=1) + this_trait.retrieve_info(get_qtl_info=True) description = this_trait.post_publication_description + + #If the dataset is confidential and the user has access to confidential + #phenotype traits, then display the pre-publication description instead + #of the post-publication description if this_trait.confidential: continue # for now - if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): + + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( + privilege=self.privilege, + userName=self.userName, + authorized_users=this_trait.authorized_users): + description = this_trait.pre_publication_description - this_trait.description_display = unicode(description, "utf8") + + this_trait.description_display = description if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index db76ddea..6648047c 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -320,7 +320,11 @@ class GeneralTrait(object): #XZ: assign SQL query result to trait attributes. for i, field in enumerate(self.dataset.display_fields): print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i])) - setattr(self, field, trait_info[i]) + holder = trait_info[i] + if isinstance(trait_info[i], basestring): + print("is basestring") + holder = unicode(trait_info[i], "utf8") + setattr(self, field, holder) if self.dataset.type == 'Publish': self.confidential = 0 @@ -329,9 +333,9 @@ class GeneralTrait(object): self.homologeneid = None - print("self.geneid is:", self.geneid) - print(" type:", type(self.geneid)) - print("self.dataset.group.name is:", self.dataset.group.name) + #print("self.geneid is:", self.geneid) + #print(" type:", type(self.geneid)) + #print("self.dataset.group.name is:", self.dataset.group.name) if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlation_function.py index 7d4b58a9..7d4b58a9 100644 --- a/wqflask/wqflask/correlation/correlationFunction.py +++ b/wqflask/wqflask/correlation/correlation_function.py diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 4a0937bb..422fa8af 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -52,6 +52,8 @@ import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlationFunction from utility.benchmark import Bench +from MySQLdb import escape_string as escape + from pprint import pformat as pf METHOD_SAMPLE_PEARSON = "1" @@ -101,13 +103,14 @@ class CorrelationResults(object): self.sample_data = {} self.corr_method = start_vars['corr_sample_method'] + self.return_number = 500 #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = (self.dataset.group.parlist + - self.dataset.group.f1list + - self.dataset.group.samplelist) + self.dataset.group.f1list + + self.dataset.group.samplelist) #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': @@ -153,13 +156,15 @@ class CorrelationResults(object): #self.correlation_data_slice = collections.OrderedDict() - for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]): + for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) trait_object.sample_r = self.correlation_data[trait][0] trait_object.sample_p = self.correlation_data[trait][1] - trait_object_num_overlap = self.correlation_data[trait][2] + trait_object.num_overlap = self.correlation_data[trait][2] self.correlation_results.append(trait_object) + + #self.correlation_data_slice[trait] = self.correlation_data[trait] #self.correlation_data_slice[trait].append(trait_object) #if self.dataset.type == 'ProbeSet': @@ -203,11 +208,6 @@ class CorrelationResults(object): # mb = trait_object.mb # ) - #trait_list = self.getTissueCorrelationByList( primary_trait_symbol = self.this_trait.symbol, - # corr_results = self.correlation_results, - # TissueProbeSetFreezeId = 1, - # method=1) - #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset #self.target_db_name = start_vars['corr_dataset'] @@ -529,9 +529,13 @@ class CorrelationResults(object): #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2 #XZ, 09/24/2008: Note that the correlation value can be negative. - def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0): + def get_temp_tissue_corr_table(self, + tissue_probesetfreeze_id=0, + method="", + return_number=0): + - def cmpTissCorrAbsoluteValue(A, B): + def cmp_tisscorr_absolute_value(A, B): try: if abs(A[1]) < abs(B[1]): return 1 elif abs(A[1]) == abs(B[1]): @@ -540,26 +544,27 @@ class CorrelationResults(object): except: return 0 - symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method) + symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues( + tissue_dataset_id=TISSUE_MOUSE_DB) - symbolCorrList = symbolCorrDict.items() + symbol_corr_list = symbol_corr_dict.items() - symbolCorrList.sort(cmpTissCorrAbsoluteValue) - symbolCorrList = symbolCorrList[0 : 2*returnNumber] + symbol_corr_list.sort(cmp_tisscorr_absolute_value) + symbol_corr_list = symbol_corr_list[0 : 2*return_number] - tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE") + tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE") - q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName + q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name self.cursor.execute(q1) - for one_pair in symbolCorrList: + for one_pair in symbol_corr_list: one_symbol = one_pair[0] one_corr = one_pair[1] - one_p_value = symbolPvalueDict[one_symbol] + one_p_value = symbol_pvalue_dict[one_symbol] self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) ) - return tmpTableName + return tmp_table_name #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it. @@ -589,37 +594,50 @@ class CorrelationResults(object): return litCorrDict + def fetch_tissue_correlations(self): + """Comments Possibly Out of Date!!!!! + + + Uses getTempTissueCorrTable to generate table of tissue correlations + + This function then gathers that data and pairs it with the TraitID string. + Takes as its arguments a formdata instance, and a database instance. + Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) + for the requested correlation + + Used when the user selects the tissue correlation method; i.e. not for the + column that is appended to all probeset trait correlation tables + + """ - #XZ, 01/09/2009: Xiaodong created this function. - def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0): - """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and - pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance. - Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation""" - - - tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber) + # table name string + temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB, + method=method) - query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable) - query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)' - query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable) - query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable) + query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol + WHERE ProbeSetFreeze.Name = '{}' + and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + and ProbeSet.Symbol IS NOT NULL + and {}.Correlation IS NOT NULL""".format(dataset.mescape( + temp_table, temp_table, temp_table, temp_table, + self.dataset.name, temp_table)) - self.cursor.execute(query) - results = self.cursor.fetchall() + results = g.db.execute(query).fetchall() - tissueCorrDict = {} + tissue_corr_dict = {} for entry in results: - traitName, tissueCorr, tissuePValue = entry - tissueCorrDict[traitName] = (tissueCorr, tissuePValue) + trait_name, tissue_corr, tissue_pvalue = entry + tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue) - self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable) - - return tissueCorrDict + g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table))) + return tissue_corr_dict - #XZ, 01/13/2008 def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None): tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE") @@ -670,7 +688,7 @@ class CorrelationResults(object): use_tissue_corr = False if self.method in TISSUE_METHODS: - tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber) + tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number) use_tissue_corr = True DatabaseFileName = self.getFileName( target_db_name=self.target_db_name ) @@ -895,20 +913,28 @@ class CorrelationResults(object): return trait_list """ - def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): + def calculate_corr_for_all_tissues(self, tissue_dataset_id=None): - symbolCorrDict = {} - symbolPvalueDict = {} + symbol_corr_dict = {} + symbol_pvalue_dict = {} - primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) - primaryTraitValue = primaryTraitSymbolValueDict.values()[0] + primary_trait_symbol_value_dict = correlation_function.make_gene_tissue_value_dict( + GeneNameLst=[self.this_trait.symbol], + TissueProbeSetFreezeId=tissue_dataset_id) + primary_trait_value = primary_trait_symbol_value_dict.values()[0] - SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB) + symbol_value_dict = correlation_function.make_gene_tissue_value_dict( + gene_name_list=[], + tissue_dataset_id=tissue_dataset_id) - if method in ["2","5"]: - symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman') - else: - symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict) + symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr( + primaryTraitValue, + SymbolValueDict, + method=self.corr_method) + #else: + # symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr( + # primaryTraitValue, + # SymbolValueDict) return (symbolCorrDict, symbolPvalueDict) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 504a67ce..702b646e 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -79,7 +79,13 @@ class SearchResultPage(object): print("kw is:", kw) #self.quick_search = False self.search_terms = kw['search_terms'] - self.dataset = create_dataset(kw['dataset']) + if kw['type'] == "Phenotypes": + dataset_type = "Publish" + elif kw['type'] == "Genotypes": + dataset_type = "Geno" + else: + dataset_type = "ProbeSet" + self.dataset = create_dataset(kw['dataset'], dataset_type) self.search() self.gen_search_result() |