aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py1
-rwxr-xr-xwqflask/base/trait.py42
-rw-r--r--wqflask/utility/helper_functions.py15
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py (renamed from wqflask/wqflask/correlation/CorrelationPage.py)237
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py13
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py35
-rw-r--r--wqflask/wqflask/views.py6
7 files changed, 193 insertions, 156 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 50ef8f57..7088913c 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -741,3 +741,4 @@ def geno_mrna_confidentiality(ob):
if confidential:
# Allow confidential data later
NoConfindetialDataForYouTodaySorry
+
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 241bf2ab..2af4bc24 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -314,27 +314,27 @@ class GeneralTrait:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
#XZ: The geneid values in database should be cleaned up.
- try:
- junk = float(self.geneid)
- geneidIsNumber = 1
- except:
- geneidIsNumber = 0
-
- if geneidIsNumber:
- query = """
- SELECT
- HomologeneId
- FROM
- Homologene, Species, InbredSet
- WHERE
- Homologene.GeneId =%s AND
- InbredSet.Name = '%s' AND
- InbredSet.SpeciesId = Species.Id AND
- Species.TaxonomyId = Homologene.TaxonomyId
- """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
- result = g.db.execute(query).fetchone()
- else:
- result = None
+ #try:
+ # float(self.geneid)
+ # geneidIsNumber = True
+ #except ValueError:
+ # geneidIsNumber = False
+
+ #if geneidIsNumber:
+ query = """
+ SELECT
+ HomologeneId
+ FROM
+ Homologene, Species, InbredSet
+ WHERE
+ Homologene.GeneId =%s AND
+ InbredSet.Name = '%s' AND
+ InbredSet.SpeciesId = Species.Id AND
+ Species.TaxonomyId = Homologene.TaxonomyId
+ """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
+ result = g.db.execute(query).fetchone()
+ #else:
+ # result = None
if result:
self.homologeneid = result[0]
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
new file mode 100644
index 00000000..920d9ac6
--- /dev/null
+++ b/wqflask/utility/helper_functions.py
@@ -0,0 +1,15 @@
+from __future__ import absolute_import, print_function, division
+
+from base.trait import GeneralTrait
+from base import data_set
+
+def get_dataset_and_trait(self, start_vars):
+ #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
+ self.dataset = data_set.create_dataset(start_vars['dataset'])
+ self.this_trait = GeneralTrait(dataset=self.dataset.name,
+ name=start_vars['trait_id'],
+ cellid=None)
+
+ #if read_genotype:
+ self.dataset.group.read_genotype_file()
+ self.genotype = self.dataset.group.genotype \ No newline at end of file
diff --git a/wqflask/wqflask/correlation/CorrelationPage.py b/wqflask/wqflask/correlation/show_corr_results.py
index f1dd96ef..23dd1534 100644
--- a/wqflask/wqflask/correlation/CorrelationPage.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -27,7 +27,7 @@
# Refactored correlation calculation into smaller functions in preparation of
# separating html from existing code
-from __future__ import print_function
+from __future__ import absolute_import, print_function, division
import string
from math import *
@@ -47,12 +47,12 @@ from base import webqtlConfig
from utility.THCell import THCell
from utility.TDCell import TDCell
from base.webqtlTrait import GeneralTrait
-from base.data_set import create_dataset
+from base import data_set
from base.templatePage import templatePage
-from utility import webqtlUtil
+from utility import webqtlUtil, helper_functions
from dbFunction import webqtlDatabaseFunction
import utility.webqtlUtil #this is for parallel computing only.
-import correlationFunction
+from wqflask.correlation import correlationFunction
METHOD_SAMPLE_PEARSON = "1"
@@ -119,6 +119,7 @@ class Trait(object):
if abs(self.correlation) >= 1.0:
self.p_value = 0.0
else:
+ #Confirm that this division works after future import
ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
ZValue = ZValue*sqrt(self.overlap-3)
self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
@@ -128,10 +129,9 @@ class Trait(object):
#XZ, 01/14/2009: This method is for parallel computing only.
#XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1)
#XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected
-def compute_corr( input_nnCorr, input_trait, input_list, computing_method):
+def compute_corr(input_nnCorr, input_trait, input_list, computing_method):
allcorrelations = []
-
for line in input_list:
tokens = line.split('","')
tokens[-1] = tokens[-1][:-2] #remove the last "
@@ -257,11 +257,11 @@ def auth_user_for_db(db, cursor, target_db_name, privilege, username):
raise AuthException("The %s database you selected is not open to the public at this time, please go back and select other database." % indFullName)
-class CorrelationPage(templatePage):
+class CorrelationResults(object):
corr_min_informative = 4
- PAGE_HEADING = "Correlation Table"
+ #PAGE_HEADING = "Correlation Table"
#CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)",
# "2" : "Genetic Correlation (Spearman's rho)",
# "3" : "SGO Literature Correlation",
@@ -271,98 +271,110 @@ class CorrelationPage(templatePage):
#RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1}
- def error(self, message, *args, **kw):
- heading = heading or self.PAGE_HEADING
- return templatePage.error(heading = heading, detail = [message], error=error)
-
- def __init__(self, fd):
- #print("in CorrelationPage __init__ fd is:", pf(fd.__dict__))
- # Call the superclass constructor
-
- # Put everything in fd into self
- self.__dict__.update(fd.__dict__)
-
- templatePage.__init__(self, fd)
-
- #print("in CorrelationPage __init__ now fd is:", pf(fd.__dict__))
- # Connect to the database
- if not self.openMysql():
- return
-
- # Read the genotype from a file
- if not fd.genotype:
- fd.readGenotype()
-
- sample_list = get_sample_data(fd)
- print("sample_list is", pf(sample_list))
-
- # Whether the user chose BXD Only, Non-BXD Only, or All Strains
- # (replace BXD with whatever the group/inbredset name is)
- # "mdp" stands for "mouse diversity panel" This is outdated; it now represents any
- # cases/strains from the non-primary group
- mdp_choice = fd.MDPChoice if fd.allstrainlist else None
-
- self.species = get_species(fd, self.cursor)
+ #def error(self, message, *args, **kw):
+ # heading = heading or self.PAGE_HEADING
+ # return templatePage.error(heading = heading, detail = [message], error=error)
+
+ def __init__(self, start_vars):
+ #self.dataset = create_dataset(start_vars['dataset_name'])
+ #self.dataset.group.read_genotype_file()
+ #self.genotype = self.dataset.group.genotype
+ #
+ #self.this_trait = GeneralTrait(dataset=self.dataset.name,
+ # name=start_vars['trait_id'],
+ # cellid=None)
+
+ helper_functions.get_dataset_and_trait(self, start_vars)
+
+ self.samples = [] # Want only ones with values
+ self.vals = []
+ self.variances = []
+
+ corr_samples_group = start_vars['corr_samples_group']
+ if corr_samples_group != 'samples_other':
+ self.process_samples(start_vars, self.dataset.group.samplelist, ())
+ #for sample in self.dataset.group.samplelist:
+ # value = start_vars['value:' + sample]
+ # variance = start_vars['variance:' + sample]
+ # if variance.strip().lower() == 'x':
+ # variance = 0
+ # else:
+ # variance = float(variance)
+ # if value.strip().lower() != 'x':
+ # self.samples.append(str(sample))
+ # self.vals.append(float(value))
+ # self.variances.append(variance)
+
+ if corr_samples_group != 'samples_primary':
+ primary_samples = (self.dataset.group.parlist +
+ self.dataset.group.f1list +
+ self.dataset.group.samplelist)
+ self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
+ #for sample in self.this_trait.data.keys():
+ # if sample not in primary_samples:
+ # value = start_vars['value:' + sample]
+ # variance = start_vars['variance:' + sample]
+ # if variance.strip().lower() == 'x':
+ # variance = 0
+ # else:
+ # variance = float(variance)
+ # if value.strip().lower() != 'x':
+ # self.samples.append(str(sample))
+ # self.vals.append(float(value))
+ # self.variances.append(variance)
+
+ print("self.samples is:", pf(self.samples))
+
+ #sample_list = get_sample_data(fd)
+ #print("sample_list is", pf(sample_list))
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
- self.target_db_name = fd.corr_dataset
+ self.target_db_name = start_vars['corr_dataset']
+ # Zach said this is ok
+ # Auth if needed
#try:
- #print("target_db_name is:", target_db_name)
- self.db = create_dataset(self.db_conn, self.target_db_name)
- #except:
- # detail = ["The database you just requested has not been established yet."]
- # self.error(detail)
- # return
-
- # Auth if needed
- try:
- auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName)
- except AuthException as e:
- detail = [e.message]
- return self.error(detail)
+ # auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName)
+ #except AuthException as e:
+ # detail = [e.message]
+ # return self.error(detail)
#XZ, 09/18/2008: filter out the strains that have no value.
- self.sample_names, vals, vars, N = fd.informativeStrains(sample_list)
+ #self.sample_names, vals, vars, N = fd.informativeStrains(sample_list)
- print("samplenames is:", pf(self.sample_names))
+ #print("samplenames is:", pf(self.sample_names))
#CF - If less than a minimum number of strains/cases in common, don't calculate anything
- if len(self.sample_names) < self.corr_min_informative:
- detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)]
- self.error(heading=None, detail=detail)
-
- for key, value in self.__dict__.items():
- if key.startswith("corr"):
- print("[red] %s - %s" % (key, value))
+ #if len(self.sample_names) < self.corr_min_informative:
+ # detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)]
+ # self.error(heading=None, detail=detail)
#correlation_method = self.CORRELATION_METHODS[self.method]
#rankOrder = self.RANK_ORDERS[self.method]
# CF - Number of results returned
# Todo: Get rid of self.returnNumber
- self.returnNumber = self.corr_return_results
- self.record_count = 0
+ #self.record_count = 0
- myTrait = get_custom_trait(fd, self.cursor)
+ #myTrait = get_custom_trait(fd, self.cursor)
# We will not get Literature Correlations if there is no GeneId because there is nothing
# to look against
- self.gene_id = int(fd.GeneId)
+ self.geneid = self.this_trait.geneid
# We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against
- self.trait_symbol = myTrait.symbol
+ #self.trait_symbol = myTrait.symbol
#XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid
- self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.species, self.gene_id)
+ self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid)
#XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)'
self.tissue_probeset_freeze_id = 1
- traitList = self.correlate(vals)
+ traitList = self.correlate(self.vals)
_log.info("Done doing correlation calculation")
@@ -741,13 +753,19 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
else:
self.dict['body'] = ""
-
-#############################
-# #
-# CorrelationPage Functions #
-# #
-#############################
-
+ def process_samples(self, start_vars, sample_names, excluded_samples):
+ for sample in sample_names:
+ if sample not in excluded_samples:
+ value = start_vars['value:' + sample]
+ variance = start_vars['variance:' + sample]
+ if variance.strip().lower() == 'x':
+ variance = 0
+ else:
+ variance = float(variance)
+ if value.strip().lower() != 'x':
+ self.samples.append(str(sample))
+ self.vals.append(float(value))
+ self.variances.append(variance)
def getSortByValue(self, calculationMethod):
@@ -805,8 +823,7 @@ Resorting this table <br>
"""Returns the name of the reference database file with which correlations are calculated.
Takes argument cursor which is a cursor object of any instance of a subclass of templatePage
Used by correlationPage"""
-
- query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name
+ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name
self.cursor.execute(query)
result = self.cursor.fetchone()
Id = result[0]
@@ -817,6 +834,7 @@ Resorting this table <br>
FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt'
return FileName
+ query = 'SELECT Id, FullName F
#XZ, 01/29/2009: I modified this function.
@@ -835,26 +853,32 @@ Resorting this table <br>
#XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid
#XZ, 12/12/2008: if the input geneid is 'None', return 0
#XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0
- def translateToMouseGeneID (self, species, geneid):
- mouse_geneid = 0;
+ def translateToMouseGeneID(self, species, geneid):
+ #mouse_geneid = 0
- #if input geneid is None, return 0.
if not geneid:
- return mouse_geneid
+ return 0
+
+ #self.id, self.name, self.fullname, self.shortname = g.db.execute("""
+ # SELECT Id, Name, FullName, ShortName
+ # FROM %s
+ # WHERE public > %s AND
+ # (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
+ # """ % (query_args)).fetchone()
if species == 'mouse':
mouse_geneid = geneid
elif species == 'rat':
- self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE rat=%d" % int(geneid) )
- record = self.cursor.fetchone()
- if record:
- mouse_geneid = record[0]
+ mouse_geneid = g.db.execute(
+ """SELECT mouse FROM GeneIDXRef WHERE rat='%d'""", int(geneid)).fetchone().mouse
+ #if record:
+ # mouse_geneid = record[0]
elif species == 'human':
- self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE human=%d" % int(geneid) )
- record = self.cursor.fetchone()
- if record:
- mouse_geneid = record[0]
-
+ mouse_geneid = g.db.execute(
+ """SELECT mouse FROM GeneIDXRef WHERE human='%d'""", int(geneid)).fetchone().mouse
+ #if record:
+ # mouse_geneid = record[0]
+ print("mouse_geneid:", mouse_geneid)
return mouse_geneid
@@ -880,7 +904,6 @@ Resorting this table <br>
except: return False
-
def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId):
StrainIds = []
@@ -1181,9 +1204,10 @@ Resorting this table <br>
return traitList
- def get_trait(self, cached, vals):
+ def get_traits(self, vals):
- if cached:
+ #Todo: Redo cached stuff using memcached
+ if False:
_log.info("Using the fast method because the file exists")
lit_corrs = {}
tissue_corrs = {}
@@ -1235,14 +1259,14 @@ Resorting this table <br>
return traits, new_vals
else:
- _log.info("Using the slow method for correlation")
-
- _log.info("Fetching from database")
+ #_log.info("Using the slow method for correlation")
+ #
+ #_log.info("Fetching from database")
traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
- _log.info("Done fetching from database")
+ #_log.info("Done fetching from database")
totalTraits = len(traits) #XZ, 09/18/2008: total trait number
- return traits, vals
+ return traits
def do_parallel_correlation(self):
@@ -1302,17 +1326,17 @@ Resorting this table <br>
_log.info("Done correlating using the fast method")
- def correlate(self, vals):
+ def correlate(self):
correlations = []
#XZ: Use the fast method only for probeset dataset, and this dataset must have been created.
#XZ: Otherwise, use original method
- _log.info("Entering correlation")
+ #_log.info("Entering correlation")
- db_filename = self.getFileName( target_db_name=self.target_db_name )
-
- cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR)
+ #db_filename = self.getFileName(target_db_name=self.target_db_name)
+ #
+ #cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR)
# If the cache file exists, do a cached correlation for probeset data
if self.db.type == "ProbeSet":
@@ -1321,7 +1345,7 @@ Resorting this table <br>
#
# else:
- (traits, vals) = self.get_trait(cache_available, vals)
+ traits = self.get_traits(self.vals)
for trait in traits:
trait.calculate_correlation(vals, self.method)
@@ -2080,3 +2104,4 @@ Resorting this table <br>
newrow += 1
return tblobj_body, worksheet, corrScript
+
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 374e7c95..7cdc350f 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -19,9 +19,9 @@ from htmlgen import HTMLgen2 as HT
from utility import Plot, Bunch
from wqflask.interval_analyst import GeneUtil
from base.trait import GeneralTrait
-from base.data_set import create_dataset
+from base import data_set
from base.templatePage import templatePage
-from utility import webqtlUtil
+from utility import webqtlUtil, helper_functions
from base import webqtlConfig
from dbFunction import webqtlDatabaseFunction
from base.GeneralObject import GeneralObject
@@ -54,10 +54,8 @@ class MarkerRegression(object):
#print("start_vars are: ", pf(start_vars))
- self.dataset = create_dataset(start_vars['dataset_name'])
- self.this_trait = GeneralTrait(dataset=self.dataset.name,
- name=start_vars['trait_id'],
- cellid=None)
+ helper_functions.get_dataset_and_trait(self, start_vars)
+
self.num_perm = int(start_vars['num_perm'])
# Passed in by the form (user might have edited)
@@ -67,9 +65,6 @@ class MarkerRegression(object):
self.vals = []
self.variances = []
- self.dataset.group.read_genotype_file()
- self.genotype = self.dataset.group.genotype
-
assert start_vars['display_all_lrs'] in ('True', 'False')
self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 9bd45905..603c40f5 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -14,9 +14,9 @@ from htmlgen import HTMLgen2 as HT
from base import webqtlConfig
from base import webqtlCaseData
from wqflask.show_trait.SampleList import SampleList
-from utility import webqtlUtil, Plot, Bunch
+from utility import webqtlUtil, Plot, Bunch, helper_functions
from base.trait import GeneralTrait
-from base.data_set import create_dataset
+from base import data_set
from dbFunction import webqtlDatabaseFunction
from base.templatePage import templatePage
from basicStatistics import BasicStatisticsFunctions
@@ -38,17 +38,19 @@ class ShowTrait(object):
print("in ShowTrait, kw are:", kw)
self.trait_id = kw['trait_id']
- self.dataset = create_dataset(kw['dataset'])
+ helper_functions.get_dataset_and_trait(self, kw)
- #self.cell_id = None
-
-
- this_trait = GeneralTrait(dataset=self.dataset.name,
- name=self.trait_id,
- cellid=None)
-
-
- self.dataset.group.read_genotype_file()
+ #self.dataset = create_dataset(kw['dataset'])
+ #
+ ##self.cell_id = None
+ #
+ #
+ #this_trait = GeneralTrait(dataset=self.dataset.name,
+ # name=self.trait_id,
+ # cellid=None)
+ #
+ #
+ #self.dataset.group.read_genotype_file()
if not self.dataset.group.genotype:
self.read_data(include_f1=True)
@@ -101,23 +103,22 @@ class ShowTrait(object):
#hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor,
# groupName=fd.group)
- self.dispTraitInformation(kw, "", hddn, this_trait) #Display trait information + function buttons
+ self.dispTraitInformation(kw, "", hddn, self.this_trait) #Display trait information + function buttons
#if this_trait == None:
# this_trait = webqtlTrait(data=kw['allTraitData'], dataset=None)
- self.build_correlation_tools(this_trait)
+ self.build_correlation_tools(self.this_trait)
- self.make_sample_lists(this_trait)
+ self.make_sample_lists(self.this_trait)
if self.dataset.group.allsamples:
hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')
hddn['trait_id'] = self.trait_id
- hddn['dataset_name'] = self.dataset.name
+ hddn['dataset'] = self.dataset.name
# We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
- self.this_trait = this_trait
self.hddn = hddn
self.sample_group_types = OrderedDict()
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index c9659a83..472548f0 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -19,7 +19,7 @@ from wqflask import search_results
from wqflask.show_trait import show_trait
from wqflask.show_trait import export_trait_data
from wqflask.marker_regression import marker_regression
-from wqflask.correlation import CorrelationPage
+from wqflask.correlation import show_corr_results
from wqflask.dataSharing import SharingInfo, SharingInfoPage
@@ -161,8 +161,8 @@ def marker_regression_page():
@app.route("/corr_compute", methods=('POST',))
def corr_compute_page():
print("In corr_compute, request.args is:", pf(request.form))
- fd = webqtlFormData.webqtlFormData(request.form)
- template_vars = CorrelationPage.CorrelationPage(fd)
+ #fd = webqtlFormData.webqtlFormData(request.form)
+ template_vars = show_corr_results.CorrelationResults(request.form)
return render_template("correlation_page.html", **template_vars.__dict__)
@app.route("/int_mapping", methods=('POST',))