aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2013-07-18 15:06:40 -0500
committerZachary Sloan2013-07-18 15:06:40 -0500
commit89c4dc078c6371e4bc56bf8708fc7dca6c49b350 (patch)
tree2c6fa9e5c78fa3252b52c537a9cb24c2f1437a44 /wqflask
parentd9065d1b94c3305f06c4ee368d4b0850b3a96e21 (diff)
parent1a3a456eb7635337a966913f2e2e75c8a89bb92e (diff)
downloadgenenetwork2-89c4dc078c6371e4bc56bf8708fc7dca6c49b350.tar.gz
Merge branch 'master' of https://github.com/zsloan/genenetwork
Conflicts: wqflask/base/webqtlConfigLocal.py wqflask/maintenance/gen_select_dataset.py wqflask/wqflask/static/new/javascript/dataset_menu_structure.json
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py90
-rwxr-xr-xwqflask/base/trait.py12
-rw-r--r--wqflask/maintenance/gen_select_dataset.py13
-rw-r--r--wqflask/wqflask/correlation/correlation_function.py (renamed from wqflask/wqflask/correlation/correlationFunction.py)0
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py130
-rw-r--r--wqflask/wqflask/search_results.py8
-rw-r--r--wqflask/wqflask/static/new/javascript/dataset_menu_structure.json24
7 files changed, 186 insertions, 91 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 03b24230..30221503 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -48,32 +48,67 @@ from MySQLdb import escape_string as escape
from pprint import pformat as pf
# Used by create_database to instantiate objects
+# Each subclass will add to this
DS_NAME_MAP = {}
def create_dataset(dataset_name, dataset_type = None):
- #print("dataset_name:", dataset_name)
-
+
+ print("dataset_type:", dataset_type)
if not dataset_type:
- query = """
- SELECT DBType.Name
- FROM DBList, DBType
- WHERE DBList.Name = '{}' and
- DBType.Id = DBList.DBTypeId
- """.format(escape(dataset_name))
- #print("query is: ", pf(query))
- dataset_type = g.db.execute(query).fetchone().Name
+ dataset_type = Dataset_Getter(dataset_name)
+ #dataset_type = get_dataset_type_from_json(dataset_name)
- #dataset_type = cursor.fetchone()[0]
- #print("[blubber] dataset_type:", pf(dataset_type))
+ print("dataset_type is:", dataset_type)
+ #query = """
+ # SELECT DBType.Name
+ # FROM DBList, DBType
+ # WHERE DBList.Name = '{}' and
+ # DBType.Id = DBList.DBTypeId
+ # """.format(escape(dataset_name))
+ #dataset_type = g.db.execute(query).fetchone().Name
- dataset_ob = DS_NAME_MAP[dataset_type]
- #dataset_class = getattr(data_set, dataset_ob)
- #print("dataset_ob:", dataset_ob)
- #print("DS_NAME_MAP:", pf(DS_NAME_MAP))
+ dataset_ob = DS_NAME_MAP[dataset_type]
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name)
+
+#def get_dataset_type_from_json(dataset_name):
+
+class Dataset_Types(object):
+
+ def __init__(self):
+ self.datasets = {}
+ file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
+ with open(file_name, 'r') as fh:
+ data = json.load(fh)
+
+ print("*" * 70)
+ for species in data['datasets']:
+ for group in data['datasets'][species]:
+ for dataset_type in data['datasets'][species][group]:
+ for dataset in data['datasets'][species][group][dataset_type]:
+ print("dataset is:", dataset)
+
+ short_dataset_name = dataset[0]
+ if dataset_type == "Phenotypes":
+ new_type = "Publish"
+ elif dataset_type == "Genotypes":
+ new_type = "Geno"
+ else:
+ new_type = "ProbeSet"
+ self.datasets[short_dataset_name] = new_type
+
+ def __call__(self, name):
+ return self.datasets[name]
+
+# Do the intensive work at startup one time only
+Dataset_Getter = Dataset_Types()
+
+#
+#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611"))
+
+
def create_datasets_list():
key = "all_datasets"
result = Redis.get(key)
@@ -212,7 +247,7 @@ class DatasetGroup(object):
marker_class = Markers
self.markers = marker_class(self.name)
-
+
def get_f1_parent_strains(self):
try:
@@ -225,7 +260,7 @@ class DatasetGroup(object):
self.f1list = [f1, f12]
if maternal and paternal:
self.parlist = [maternal, paternal]
-
+
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
#if self.group == 'BXD300':
@@ -375,6 +410,9 @@ class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
def setup(self):
+
+ print("IS A PHENOTYPEDATASET")
+
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
@@ -445,14 +483,24 @@ class PhenotypeDataSet(DataSet):
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
if not this_trait.haveinfo:
- this_trait.retrieveInfo(QTL=1)
+ this_trait.retrieve_info(get_qtl_info=True)
description = this_trait.post_publication_description
+
+ #If the dataset is confidential and the user has access to confidential
+ #phenotype traits, then display the pre-publication description instead
+ #of the post-publication description
if this_trait.confidential:
continue # for now
- if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
+
+ if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
+ privilege=self.privilege,
+ userName=self.userName,
+ authorized_users=this_trait.authorized_users):
+
description = this_trait.pre_publication_description
- this_trait.description_display = unicode(description, "utf8")
+
+ this_trait.description_display = description
if not this_trait.year.isdigit():
this_trait.pubmed_text = "N/A"
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index db76ddea..6648047c 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -320,7 +320,11 @@ class GeneralTrait(object):
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(self.dataset.display_fields):
print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i]))
- setattr(self, field, trait_info[i])
+ holder = trait_info[i]
+ if isinstance(trait_info[i], basestring):
+ print("is basestring")
+ holder = unicode(trait_info[i], "utf8")
+ setattr(self, field, holder)
if self.dataset.type == 'Publish':
self.confidential = 0
@@ -329,9 +333,9 @@ class GeneralTrait(object):
self.homologeneid = None
- print("self.geneid is:", self.geneid)
- print(" type:", type(self.geneid))
- print("self.dataset.group.name is:", self.dataset.group.name)
+ #print("self.geneid is:", self.geneid)
+ #print(" type:", type(self.geneid))
+ #print("self.dataset.group.name is:", self.dataset.group.name)
if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index acf21ed8..d653fd2d 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -47,15 +47,6 @@ import urlparse
from pprint import pformat as pf
-#Engine = sa.create_engine(our_settings.SQLALCHEMY_DATABASE_URI)
-
-# build MySql database connection
-
-#conn = Engine.connect()
-
-
-
-
def parse_db_uri(db_uri):
"""Converts a database URI to the db name, host name, user name, and password"""
@@ -170,9 +161,13 @@ def build_datasets(species, group, type_name):
def main():
"""Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
+<<<<<<< HEAD
parse_db_uri(our_settings.DB_URI)
+=======
+
+>>>>>>> 1a3a456eb7635337a966913f2e2e75c8a89bb92e
species = get_species()
groups = get_groups(species)
types = get_types(groups)
diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlation_function.py
index 7d4b58a9..7d4b58a9 100644
--- a/wqflask/wqflask/correlation/correlationFunction.py
+++ b/wqflask/wqflask/correlation/correlation_function.py
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 4a0937bb..422fa8af 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -52,6 +52,8 @@ import utility.webqtlUtil #this is for parallel computing only.
from wqflask.correlation import correlationFunction
from utility.benchmark import Bench
+from MySQLdb import escape_string as escape
+
from pprint import pformat as pf
METHOD_SAMPLE_PEARSON = "1"
@@ -101,13 +103,14 @@ class CorrelationResults(object):
self.sample_data = {}
self.corr_method = start_vars['corr_sample_method']
+ self.return_number = 500
#The two if statements below append samples to the sample list based upon whether the user
#rselected Primary Samples Only, Other Samples Only, or All Samples
primary_samples = (self.dataset.group.parlist +
- self.dataset.group.f1list +
- self.dataset.group.samplelist)
+ self.dataset.group.f1list +
+ self.dataset.group.samplelist)
#If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
@@ -153,13 +156,15 @@ class CorrelationResults(object):
#self.correlation_data_slice = collections.OrderedDict()
- for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+ for trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True)
trait_object.sample_r = self.correlation_data[trait][0]
trait_object.sample_p = self.correlation_data[trait][1]
- trait_object_num_overlap = self.correlation_data[trait][2]
+ trait_object.num_overlap = self.correlation_data[trait][2]
self.correlation_results.append(trait_object)
+
+
#self.correlation_data_slice[trait] = self.correlation_data[trait]
#self.correlation_data_slice[trait].append(trait_object)
#if self.dataset.type == 'ProbeSet':
@@ -203,11 +208,6 @@ class CorrelationResults(object):
# mb = trait_object.mb
# )
- #trait_list = self.getTissueCorrelationByList( primary_trait_symbol = self.this_trait.symbol,
- # corr_results = self.correlation_results,
- # TissueProbeSetFreezeId = 1,
- # method=1)
-
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
#self.target_db_name = start_vars['corr_dataset']
@@ -529,9 +529,13 @@ class CorrelationResults(object):
#XZ, 09/23/2008: In tissue correlation tables, there is no record of GeneId1 == GeneId2
#XZ, 09/24/2008: Note that the correlation value can be negative.
- def getTempTissueCorrTable(self, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber=0):
+ def get_temp_tissue_corr_table(self,
+ tissue_probesetfreeze_id=0,
+ method="",
+ return_number=0):
+
- def cmpTissCorrAbsoluteValue(A, B):
+ def cmp_tisscorr_absolute_value(A, B):
try:
if abs(A[1]) < abs(B[1]): return 1
elif abs(A[1]) == abs(B[1]):
@@ -540,26 +544,27 @@ class CorrelationResults(object):
except:
return 0
- symbolCorrDict, symbolPvalueDict = self.calculateCorrOfAllTissueTrait(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method)
+ symbol_corr_dict, symbol_pvalue_dict = self.calculate_corr_for_all_tissues(
+ tissue_dataset_id=TISSUE_MOUSE_DB)
- symbolCorrList = symbolCorrDict.items()
+ symbol_corr_list = symbol_corr_dict.items()
- symbolCorrList.sort(cmpTissCorrAbsoluteValue)
- symbolCorrList = symbolCorrList[0 : 2*returnNumber]
+ symbol_corr_list.sort(cmp_tisscorr_absolute_value)
+ symbol_corr_list = symbol_corr_list[0 : 2*return_number]
- tmpTableName = webqtlUtil.genRandStr(prefix="TOPTISSUE")
+ tmp_table_name = webqtlUtil.genRandStr(prefix="TOPTISSUE")
- q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmpTableName
+ q1 = 'CREATE TEMPORARY TABLE %s (Symbol varchar(100) PRIMARY KEY, Correlation float, PValue float)' % tmp_table_name
self.cursor.execute(q1)
- for one_pair in symbolCorrList:
+ for one_pair in symbol_corr_list:
one_symbol = one_pair[0]
one_corr = one_pair[1]
- one_p_value = symbolPvalueDict[one_symbol]
+ one_p_value = symbol_pvalue_dict[one_symbol]
self.cursor.execute( "INSERT INTO %s (Symbol, Correlation, PValue) VALUES ('%s',%f,%f)" % (tmpTableName, one_symbol, float(one_corr), float(one_p_value)) )
- return tmpTableName
+ return tmp_table_name
#XZ, 01/09/2009: This function was created by David Crowell. Xiaodong cleaned up and modified it.
@@ -589,37 +594,50 @@ class CorrelationResults(object):
return litCorrDict
+ def fetch_tissue_correlations(self):
+ """Comments Possibly Out of Date!!!!!
+
+
+ Uses getTempTissueCorrTable to generate table of tissue correlations
+
+ This function then gathers that data and pairs it with the TraitID string.
+ Takes as its arguments a formdata instance, and a database instance.
+ Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue)
+ for the requested correlation
+
+ Used when the user selects the tissue correlation method; i.e. not for the
+ column that is appended to all probeset trait correlation tables
+
+ """
- #XZ, 01/09/2009: Xiaodong created this function.
- def fetchTissueCorrelations(self, db, primaryTraitSymbol="", TissueProbeSetFreezeId=0, method="", returnNumber = 0):
- """Uses getTempTissueCorrTable to generate table of tissue correlations. This function then gathers that data and
- pairs it with the TraitID string. Takes as its arguments a formdata instance, and a database instance.
- Returns a dictionary of 'TraitID':(tissueCorr, tissuePValue) for the requested correlation"""
-
-
- tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=primaryTraitSymbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=method, returnNumber=returnNumber)
+ # table name string
+ temp_table = self.get_temp_tissue_corr_table(tissue_probesetfreeze_id=TISSUE_MOUSE_DB,
+ method=method)
- query = "SELECT ProbeSet.Name, %s.Correlation, %s.PValue" % (tempTable, tempTable)
- query += ' FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)'
- query += ' LEFT JOIN %s ON %s.Symbol=ProbeSet.Symbol ' % (tempTable,tempTable)
- query += "WHERE ProbeSetFreeze.Name = '%s' and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSet.Symbol IS NOT NULL AND %s.Correlation IS NOT NULL" % (db.name, tempTable)
+ query = """SELECT ProbeSet.Name, {}.Correlation, {}.PValue
+ FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ LEFT JOIN {} ON {}.Symbol=ProbeSet.Symbol
+ WHERE ProbeSetFreeze.Name = '{}'
+ and ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ and ProbeSet.Symbol IS NOT NULL
+ and {}.Correlation IS NOT NULL""".format(dataset.mescape(
+ temp_table, temp_table, temp_table, temp_table,
+ self.dataset.name, temp_table))
- self.cursor.execute(query)
- results = self.cursor.fetchall()
+ results = g.db.execute(query).fetchall()
- tissueCorrDict = {}
+ tissue_corr_dict = {}
for entry in results:
- traitName, tissueCorr, tissuePValue = entry
- tissueCorrDict[traitName] = (tissueCorr, tissuePValue)
+ trait_name, tissue_corr, tissue_pvalue = entry
+ tissue_corr_dict[trait_name] = (tissue_corr, tissue_pvalue)
- self.cursor.execute('DROP TEMPORARY TABLE %s' % tempTable)
-
- return tissueCorrDict
+ g.db.execute('DROP TEMPORARY TABLE {}'.format(escape(temp_table)))
+ return tissue_corr_dict
- #XZ, 01/13/2008
def getLiteratureCorrelationByList(self, input_trait_mouse_geneid=None, species=None, traitList=None):
tmpTableName = webqtlUtil.genRandStr(prefix="LITERATURE")
@@ -670,7 +688,7 @@ class CorrelationResults(object):
use_tissue_corr = False
if self.method in TISSUE_METHODS:
- tissue_corrs = self.fetchTissueCorrelations(db=self.db, primaryTraitSymbol=self.trait_symbol, TissueProbeSetFreezeId=TISSUE_MOUSE_DB, method=self.method, returnNumber = self.returnNumber)
+ tissue_corrs = self.fetch_tissue_correlations(method=self.method, return_number = self.return_number)
use_tissue_corr = True
DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
@@ -895,20 +913,28 @@ class CorrelationResults(object):
return trait_list
"""
- def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None):
+ def calculate_corr_for_all_tissues(self, tissue_dataset_id=None):
- symbolCorrDict = {}
- symbolPvalueDict = {}
+ symbol_corr_dict = {}
+ symbol_pvalue_dict = {}
- primaryTraitSymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[primaryTraitSymbol], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
- primaryTraitValue = primaryTraitSymbolValueDict.values()[0]
+ primary_trait_symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+ GeneNameLst=[self.this_trait.symbol],
+ TissueProbeSetFreezeId=tissue_dataset_id)
+ primary_trait_value = primary_trait_symbol_value_dict.values()[0]
- SymbolValueDict = correlationFunction.getGeneSymbolTissueValueDictForTrait(cursor=self.cursor, GeneNameLst=[], TissueProbeSetFreezeId=TISSUE_MOUSE_DB)
+ symbol_value_dict = correlation_function.make_gene_tissue_value_dict(
+ gene_name_list=[],
+ tissue_dataset_id=tissue_dataset_id)
- if method in ["2","5"]:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict,method='spearman')
- else:
- symbolCorrDict, symbolPvalueDict = correlationFunction.batchCalTissueCorr(primaryTraitValue,SymbolValueDict)
+ symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ primaryTraitValue,
+ SymbolValueDict,
+ method=self.corr_method)
+ #else:
+ # symbol_corr_dict, symbol_pvalue_dict = correlation_function.batch_cal_tissue_corr(
+ # primaryTraitValue,
+ # SymbolValueDict)
return (symbolCorrDict, symbolPvalueDict)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 504a67ce..702b646e 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -79,7 +79,13 @@ class SearchResultPage(object):
print("kw is:", kw)
#self.quick_search = False
self.search_terms = kw['search_terms']
- self.dataset = create_dataset(kw['dataset'])
+ if kw['type'] == "Phenotypes":
+ dataset_type = "Publish"
+ elif kw['type'] == "Genotypes":
+ dataset_type = "Geno"
+ else:
+ dataset_type = "ProbeSet"
+ self.dataset = create_dataset(kw['dataset'], dataset_type)
self.search()
self.gen_search_result()
diff --git a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json
index 4aae20ba..a4de53bd 100644
--- a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json
+++ b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json
@@ -798,6 +798,10 @@
"OHSU/VA B6D2F2 Striatum M430v2 (Sep05) RMA"
],
[
+ "SA_M2_0905_R",
+ "OHSU/VA B6D2F2 Striatum M430v2 (Sep05) RMA"
+ ],
+ [
"SA_M2_0905_M",
"OHSU/VA B6D2F2 Striatum M430v2 (Sep05) MAS5"
],
@@ -1048,6 +1052,10 @@
],
"Cerebellum mRNA": [
[
+ "CB_M_1004_M",
+ "SJUT Cerebellum mRNA M430 (Oct04) MAS5"
+ ],
+ [
"CB_M_1004_R",
"SJUT Cerebellum mRNA M430 (Oct04) RMA"
],
@@ -1056,10 +1064,6 @@
"SJUT Cerebellum mRNA M430 (Oct04) PDNN"
],
[
- "CB_M_1004_M",
- "SJUT Cerebellum mRNA M430 (Oct04) MAS5"
- ],
- [
"CB_M_1003_M",
"SJUT Cerebellum mRNA M430 (Oct03) MAS5"
],
@@ -1548,6 +1552,10 @@
],
"Prefrontal Cortex mRNA": [
[
+ "VCUEtOH_1206_R",
+ "VCU BXD PFC EtOH M430 2.0 (Dec06) RMA"
+ ],
+ [
"VCUSal_1206_R",
"VCU BXD PFC Sal M430 2.0 (Dec06) RMA"
],
@@ -1722,6 +1730,10 @@
"VCU BXD VTA EtOH M430 2.0 (Jun09) RMA **"
],
[
+ "VCUEtOH_0609_R",
+ "VCU BXD VTA EtOH M430 2.0 (Jun09) RMA **"
+ ],
+ [
"VCUSal_0609_R",
"VCU BXD VTA Sal M430 2.0 (Jun09) RMA **"
],
@@ -2006,6 +2018,10 @@
"VCU LXS PFC EtOH M430A 2.0 (Aug06) RMA **"
],
[
+ "VCUEtOH_0806_R",
+ "VCU LXS PFC EtOH M430A 2.0 (Aug06) RMA **"
+ ],
+ [
"VCUSal_0806_R",
"VCU LXS PFC Sal M430A 2.0 (Aug06) RMA"
],