6 files changed, 181 insertions, 78 deletions
diff --git a/misc/gn_installation_notes.txt b/misc/gn_installation_notes.txt
index 46441b39..7545a5b8 100644
--- a/misc/gn_installation_notes.txt
+++ b/misc/gn_installation_notes.txt
@@ -160,6 +160,25 @@ service mysql start
 Stop MySQL server:
 service mysql stop
 
+Change root password:
+mysql> UPDATE mysql.user SET Password=PASSWORD('your password') WHERE User='root';
+
+Setup accounts in MySQL (first need to delete anonymous/non-root accounts):
+#; use mysql;
+#; select * from user;
+#; delete from user where Host!="localhost";
+#; delete from user where User!="root";
+#; update user set Password = Password('yourpassword') where User='root';
+#; GRANT ALL ON *.* TO 'yourusername'@'%' IDENTIFIED BY 'yourpassword';
+#; select * from user;
+
+============================================
+
+Check RSA key:
+ssh-keygen -l -f /etc/ssh/ssh_host_rsa_key
+
+03:2c:d7:01:01:f0:31:3a:c8:df:e4:98:62:2c:59:d2  root@penguin (RSA)
+
 ============================================
 
 Using Yolk
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 03b24230..30221503 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -48,32 +48,67 @@ from MySQLdb import escape_string as escape
 from pprint import pformat as pf
 
 # Used by create_database to instantiate objects
+# Each subclass will add to this
 DS_NAME_MAP = {}
 
 def create_dataset(dataset_name, dataset_type = None):
-    #print("dataset_name:", dataset_name)
-
+    
+    print("dataset_type:", dataset_type)
     if not dataset_type:
-        query = """
-            SELECT DBType.Name
-            FROM DBList, DBType
-            WHERE DBList.Name = '{}' and
-                  DBType.Id = DBList.DBTypeId
-            """.format(escape(dataset_name))
-        #print("query is: ", pf(query))
-        dataset_type = g.db.execute(query).fetchone().Name
+        dataset_type = Dataset_Getter(dataset_name)
+        #dataset_type = get_dataset_type_from_json(dataset_name)
 
-    #dataset_type = cursor.fetchone()[0]
-    #print("[blubber] dataset_type:", pf(dataset_type))
+        print("dataset_type is:", dataset_type)
+        #query = """
+        #    SELECT DBType.Name
+        #    FROM DBList, DBType
+        #    WHERE DBList.Name = '{}' and
+        #          DBType.Id = DBList.DBTypeId
+        #    """.format(escape(dataset_name))
+        #dataset_type = g.db.execute(query).fetchone().Name
 
-    dataset_ob = DS_NAME_MAP[dataset_type]
-    #dataset_class = getattr(data_set, dataset_ob)
-    #print("dataset_ob:", dataset_ob)
-    #print("DS_NAME_MAP:", pf(DS_NAME_MAP))
 
+    dataset_ob = DS_NAME_MAP[dataset_type]
     dataset_class = globals()[dataset_ob]
     return dataset_class(dataset_name)
 
+
+#def get_dataset_type_from_json(dataset_name):
+    
+class Dataset_Types(object):
+    
+    def __init__(self):
+        self.datasets = {}
+        file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
+        with open(file_name, 'r') as fh:
+            data = json.load(fh)
+        
+        print("*" * 70)
+        for species in data['datasets']:
+            for group in data['datasets'][species]:
+                for dataset_type in data['datasets'][species][group]:
+                    for dataset in data['datasets'][species][group][dataset_type]:
+                        print("dataset is:", dataset)
+                        
+                        short_dataset_name = dataset[0]
+                        if dataset_type == "Phenotypes":
+                            new_type = "Publish"
+                        elif dataset_type == "Genotypes":
+                            new_type = "Geno"
+                        else:
+                            new_type = "ProbeSet"
+                        self.datasets[short_dataset_name] = new_type
+                            
+    def __call__(self, name):
+        return self.datasets[name]
+    
+# Do the intensive work at startup one time only
+Dataset_Getter = Dataset_Types()
+
+#
+#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611"))
+                    
+
 def create_datasets_list():
     key = "all_datasets"
     result = Redis.get(key)
@@ -212,7 +247,7 @@ class DatasetGroup(object):
             marker_class = Markers
 
         self.markers = marker_class(self.name)
-        
+
 
     def get_f1_parent_strains(self):
         try:
@@ -225,7 +260,7 @@ class DatasetGroup(object):
             self.f1list = [f1, f12]
         if maternal and paternal:
             self.parlist = [maternal, paternal]
-            
+
     def read_genotype_file(self):
         '''Read genotype from .geno file instead of database'''
         #if self.group == 'BXD300':
@@ -375,6 +410,9 @@ class PhenotypeDataSet(DataSet):
     DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
 
     def setup(self):
+        
+        print("IS A PHENOTYPEDATASET")
+        
         # Fields in the database table
         self.search_fields = ['Phenotype.Post_publication_description',
                             'Phenotype.Pre_publication_description',
@@ -445,14 +483,24 @@ class PhenotypeDataSet(DataSet):
     def get_trait_info(self, trait_list, species = ''):
         for this_trait in trait_list:
             if not this_trait.haveinfo:
-                this_trait.retrieveInfo(QTL=1)
+                this_trait.retrieve_info(get_qtl_info=True)
 
             description = this_trait.post_publication_description
+            
+            #If the dataset is confidential and the user has access to confidential
+            #phenotype traits, then display the pre-publication description instead
+            #of the post-publication description
             if this_trait.confidential:
                 continue   # for now
-                if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
+            
+                if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
+                        privilege=self.privilege,
+                        userName=self.userName,
+                        authorized_users=this_trait.authorized_users):
+                        
                     description = this_trait.pre_publication_description
-            this_trait.description_display = unicode(description, "utf8")
+            
+            this_trait.description_display = description
 
             if not this_trait.year.isdigit():
                 this_trait.pubmed_text = "N/A"
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index db76ddea..6648047c 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -320,7 +320,11 @@ class GeneralTrait(object):
             #XZ: assign SQL query result to trait attributes.
             for i, field in enumerate(self.dataset.display_fields):
                 print("  mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i]))
-                setattr(self, field, trait_info[i])
+                holder = trait_info[i]
+                if isinstance(trait_info[i], basestring):
+                    print("is basestring")
+                    holder = unicode(trait_info[i], "utf8")
+                setattr(self, field, holder)
 
             if self.dataset.type == 'Publish':
                 self.confidential = 0
@@ -329,9 +333,9 @@ class GeneralTrait(object):
 
             self.homologeneid = None
             
-            print("self.geneid is:", self.geneid)
-            print("  type:", type(self.geneid))
-            print("self.dataset.group.name is:", self.dataset.group.name)
+            #print("self.geneid is:", self.geneid)
+            #print("  type:", type(self.geneid))
+            #print("self.dataset.group.name is:", self.dataset.group.name)
             if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
                 #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
                 #XZ: So I have to test if geneid is number before execute the query.
diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlation_function.py
index 7d4b58a9..7d4b58a9 100644
--- a/wqflask/wqflask/correlation/correlationFunction.py
+++ b/wqflask/wqflask/correlation/correlation_function.py
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 4a0937bb..422fa8af 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -52,6 +52,8 @@ import utility.webqtlUtil #this is for parallel computing only.
 from wqflask.correlation import correlationFunction
 from utility.benchmark import Bench
 
+from MySQLdb import escape_string as escape
+
 from pprint import pformat as pf
 
 METHOD_SAMPLE_PEARSON = "1"
@@ -101,13 +103,14 @@ class CorrelationResults(object):
 
             self.sample_data = {}
             self.corr_method = start_vars['corr_sample_method']
+            self.return_number = 500
 
             #The two if statements below append samples to the sample list based upon whether the user
             #rselected Primary Samples Only, Other Samples Only, or All Samples
 
             primary_samples = (self.dataset.group.parlist +
-                                   self.dataset.group.f1list +
-                                   self.dataset.group.samplelist)
+                               self.dataset.group.f1list +
+                               self.dataset.group.samplelist)
 
             #If either BXD/whatever Only or All Samples, append all of that group's samplelist      
             if corr_samples_group != 'samples_other':
@@ -153,13 +156,15 @@ class CorrelationResults(object):
 
             #self.correlation_data_slice = collections.OrderedDict()
 
-            for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+            for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
                 trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
                 trait_object.sample_r = self.correlation_data[trait][0]
                 trait_object.sample_p = self.correlation_data[trait][1]
-                trait_object_num_overlap = self.correlation_data[trait][2]
+                trait_object.num_overlap = self.correlation_data[trait][2]
                 self.correlation_results.append(trait_object)
                 
+            
+                
                 #self.correlation_data_slice[trait] = self.correlation_data[trait]
                 #self.correlation_data_slice[trait].append(trait_object)
                 #if self.dataset.type == 'ProbeSet':
@@ -203,11 +208,6 @@ class CorrelationResults(object):
                 #        mb = trait_object.mb
                 #    )
 
-            #trait_list = self.getTissueCorrelationByList( primary_trait_symbol = self.this_trait.symbol,
-            #                                            corr_results = self.correlation_results,
-            #                                            TissueProbeSetFreezeId = 1,
-            #                                            method=1)
-
         #XZ, 09/18/2008: get all information about the user selected database.
         #target_db_name = fd.corr_dataset
         #self.target_db_name = start_vars['corr_dataset']
@@ -529,9 +529,13 @@ class CorrelationResults(object):
 
     #XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
     #XZ, 09/24/2008: Note that the correlation value can be negative.
-    def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0):
+    def get_temp_tissue_corr_table(self,
+                                   tissue_probesetfreeze_id=0,
+                                   method="",
+                                   return_number=0):
+        
 
-        def cmpTissCorrAbsoluteValue(A, B):
+        def cmp_tisscorr_absolute_value(A, B):
             try:
                 if abs(A[1]) < abs(B[1]): return 1
                 elif abs(A[1]) == abs(B[1]):
@@ -540,26 +544,27 @@ class CorrelationResults(object):
             except:
                 return 0
 
-        symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method)
+        symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues(
+                                                                tissue_dataset_id=TISSUE_MOUSE_DB)
 
-        symbolCorrList = symbolCorrDict.items()
+        symbol_corr_list = symbol_corr_dict.items()
 
-        symbolCorrList.sort(cmpTissCorrAbsoluteValue)
-        symbolCorrList = symbolCorrList[0 : 2*returnNumber]
+        symbol_corr_list.sort(cmp_tisscorr_absolute_value)
+        symbol_corr_list = symbol_corr_list[0 : 2*return_number]
 
-        tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE")
+        tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
 
-        q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName
+        q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
         self.cursor.execute(q1)
 
-        for one_pair in symbolCorrList:
+        for one_pair in symbol_corr_list:
             one_symbol = one_pair[0]
             one_corr = one_pair[1]
-            one_p_value = symbolPvalueDict[one_symbol]
+            one_p_value = symbol_pvalue_dict[one_symbol]
 
             self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
 
-        return tmpTableName
+        return tmp_table_name
 
 
     #XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
@@ -589,37 +594,50 @@ class CorrelationResults(object):
         return litCorrDict
 
 
+    def fetch_tissue_correlations(self):
+        """Comments Possibly Out of Date!!!!!
+        
+        
+        Uses getTempTissueCorrTable to generate table of tissue correlations
+        
+        This function then gathers that data and pairs it with the TraitID string.
+        Takes as its arguments a formdata instance, and a database instance.
+        Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
+        for the requested correlation
+        
+        Used when the user selects the tissue correlation method; i.e. not for the
+        column that is appended to all probeset trait correlation tables
+        
+        """
 
-    #XZ, 01/09/2009: Xiaodong created this function.
-    def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0):
-        """Uses getTempTissueCorrTable to generate table of tissue correlations.  This function then gathers that data and
-        pairs it with the TraitID string.  Takes as its arguments a formdata instance, and a database instance.
-        Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation"""
-
-
-        tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber)
+        # table name string
+        temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+                                                    method=method)
 
-        query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" %  (tempTable, tempTable)
-        query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)'
-        query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
-        query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable)
+        query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
+                FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+                LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
+                WHERE ProbeSetFreeze.Name = '{}'
+                and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
+                and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+                and ProbeSet.Symbol IS NOT NULL
+                and {}.Correlation IS NOT NULL""".format(dataset.mescape(
+                    temp_table, temp_table, temp_table, temp_table,
+                    self.dataset.name, temp_table))
 
-        self.cursor.execute(query)
-        results = self.cursor.fetchall()
+        results = g.db.execute(query).fetchall()
 
-        tissueCorrDict = {}
+        tissue_corr_dict = {}
 
         for entry in results:
-            traitName, tissueCorr, tissuePValue = entry
-            tissueCorrDict[traitName] = (tissueCorr, tissuePValue)
+            trait_name, tissue_corr, tissue_pvalue = entry
+            tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
 
-        self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable)
-
-        return tissueCorrDict
+        g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
 
+        return tissue_corr_dict
 
 
-    #XZ, 01/13/2008
     def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None):
 
         tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE")
@@ -670,7 +688,7 @@ class CorrelationResults(object):
 
             use_tissue_corr = False
             if self.method in TISSUE_METHODS:
-                tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber)
+                tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number)
                 use_tissue_corr = True
 
             DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
@@ -895,20 +913,28 @@ class CorrelationResults(object):
         return trait_list
         """
 
-    def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
+    def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
 
-        symbolCorrDict = {}
-        symbolPvalueDict = {}
+        symbol_corr_dict = {}
+        symbol_pvalue_dict = {}
 
-        primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
-        primaryTraitValue = primaryTraitSymbolValueDict.values()[0]
+        primary_trait_symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+                                                    GeneNameLst=[self.this_trait.symbol],
+                                                    TissueProbeSetFreezeId=tissue_dataset_id)
+        primary_trait_value = primary_trait_symbol_value_dict.values()[0]
 
-        SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+        symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+                                        gene_name_list=[],
+                                        tissue_dataset_id=tissue_dataset_id)
 
-        if method in ["2","5"]:
-            symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman')
-        else:
-            symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict)
+        symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+                primaryTraitValue,
+                SymbolValueDict,
+                method=self.corr_method)
+        #else:
+        #    symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+        #        primaryTraitValue,
+        #        SymbolValueDict)
 
 
         return (symbolCorrDict, symbolPvalueDict)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 504a67ce..702b646e 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -79,7 +79,13 @@ class SearchResultPage(object):
             print("kw is:", kw)
             #self.quick_search = False
             self.search_terms = kw['search_terms']
-            self.dataset = create_dataset(kw['dataset'])
+            if kw['type'] == "Phenotypes":
+                dataset_type = "Publish"
+            elif kw['type'] == "Genotypes":
+                dataset_type = "Geno"
+            else:
+                dataset_type = "ProbeSet"
+            self.dataset = create_dataset(kw['dataset'], dataset_type)
             self.search()
             self.gen_search_result()