diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 1 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 42 | ||||
-rw-r--r-- | wqflask/utility/helper_functions.py | 15 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py (renamed from wqflask/wqflask/correlation/CorrelationPage.py) | 237 | ||||
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 13 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 35 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 6 |
7 files changed, 193 insertions, 156 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 50ef8f57..7088913c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -741,3 +741,4 @@ def geno_mrna_confidentiality(ob): if confidential: # Allow confidential data later NoConfindetialDataForYouTodaySorry + diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 241bf2ab..2af4bc24 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -314,27 +314,27 @@ class GeneralTrait: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. #XZ: The geneid values in database should be cleaned up. - try: - junk = float(self.geneid) - geneidIsNumber = 1 - except: - geneidIsNumber = 0 - - if geneidIsNumber: - query = """ - SELECT - HomologeneId - FROM - Homologene, Species, InbredSet - WHERE - Homologene.GeneId =%s AND - InbredSet.Name = '%s' AND - InbredSet.SpeciesId = Species.Id AND - Species.TaxonomyId = Homologene.TaxonomyId - """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) - result = g.db.execute(query).fetchone() - else: - result = None + #try: + # float(self.geneid) + # geneidIsNumber = True + #except ValueError: + # geneidIsNumber = False + + #if geneidIsNumber: + query = """ + SELECT + HomologeneId + FROM + Homologene, Species, InbredSet + WHERE + Homologene.GeneId =%s AND + InbredSet.Name = '%s' AND + InbredSet.SpeciesId = Species.Id AND + Species.TaxonomyId = Homologene.TaxonomyId + """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + result = g.db.execute(query).fetchone() + #else: + # result = None if result: self.homologeneid = result[0] diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py new file mode 100644 index 00000000..920d9ac6 --- /dev/null +++ b/wqflask/utility/helper_functions.py @@ -0,0 +1,15 @@ +from __future__ import absolute_import, print_function, division + +from base.trait import GeneralTrait +from base import data_set + +def get_dataset_and_trait(self, start_vars): + #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" + self.dataset = data_set.create_dataset(start_vars['dataset']) + self.this_trait = GeneralTrait(dataset=self.dataset.name, + name=start_vars['trait_id'], + cellid=None) + + #if read_genotype: + self.dataset.group.read_genotype_file() + self.genotype = self.dataset.group.genotype
\ No newline at end of file diff --git a/wqflask/wqflask/correlation/CorrelationPage.py b/wqflask/wqflask/correlation/show_corr_results.py index f1dd96ef..23dd1534 100644 --- a/wqflask/wqflask/correlation/CorrelationPage.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -27,7 +27,7 @@ # Refactored correlation calculation into smaller functions in preparation of # separating html from existing code -from __future__ import print_function +from __future__ import absolute_import, print_function, division import string from math import * @@ -47,12 +47,12 @@ from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell from base.webqtlTrait import GeneralTrait -from base.data_set import create_dataset +from base import data_set from base.templatePage import templatePage -from utility import webqtlUtil +from utility import webqtlUtil, helper_functions from dbFunction import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. -import correlationFunction +from wqflask.correlation import correlationFunction METHOD_SAMPLE_PEARSON = "1" @@ -119,6 +119,7 @@ class Trait(object): if abs(self.correlation) >= 1.0: self.p_value = 0.0 else: + #Confirm that this division works after future import ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation)) ZValue = ZValue*sqrt(self.overlap-3) self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) @@ -128,10 +129,9 @@ class Trait(object): #XZ, 01/14/2009: This method is for parallel computing only. #XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1) #XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected -def compute_corr( input_nnCorr, input_trait, input_list, computing_method): +def compute_corr(input_nnCorr, input_trait, input_list, computing_method): allcorrelations = [] - for line in input_list: tokens = line.split('","') tokens[-1] = tokens[-1][:-2] #remove the last " @@ -257,11 +257,11 @@ def auth_user_for_db(db, cursor, target_db_name, privilege, username): raise AuthException("The %s database you selected is not open to the public at this time, please go back and select other database." % indFullName) -class CorrelationPage(templatePage): +class CorrelationResults(object): corr_min_informative = 4 - PAGE_HEADING = "Correlation Table" + #PAGE_HEADING = "Correlation Table" #CORRELATION_METHODS = {"1" : "Genetic Correlation (Pearson's r)", # "2" : "Genetic Correlation (Spearman's rho)", # "3" : "SGO Literature Correlation", @@ -271,98 +271,110 @@ class CorrelationPage(templatePage): #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} - def error(self, message, *args, **kw): - heading = heading or self.PAGE_HEADING - return templatePage.error(heading = heading, detail = [message], error=error) - - def __init__(self, fd): - #print("in CorrelationPage __init__ fd is:", pf(fd.__dict__)) - # Call the superclass constructor - - # Put everything in fd into self - self.__dict__.update(fd.__dict__) - - templatePage.__init__(self, fd) - - #print("in CorrelationPage __init__ now fd is:", pf(fd.__dict__)) - # Connect to the database - if not self.openMysql(): - return - - # Read the genotype from a file - if not fd.genotype: - fd.readGenotype() - - sample_list = get_sample_data(fd) - print("sample_list is", pf(sample_list)) - - # Whether the user chose BXD Only, Non-BXD Only, or All Strains - # (replace BXD with whatever the group/inbredset name is) - # "mdp" stands for "mouse diversity panel" This is outdated; it now represents any - # cases/strains from the non-primary group - mdp_choice = fd.MDPChoice if fd.allstrainlist else None - - self.species = get_species(fd, self.cursor) + #def error(self, message, *args, **kw): + # heading = heading or self.PAGE_HEADING + # return templatePage.error(heading = heading, detail = [message], error=error) + + def __init__(self, start_vars): + #self.dataset = create_dataset(start_vars['dataset_name']) + #self.dataset.group.read_genotype_file() + #self.genotype = self.dataset.group.genotype + # + #self.this_trait = GeneralTrait(dataset=self.dataset.name, + # name=start_vars['trait_id'], + # cellid=None) + + helper_functions.get_dataset_and_trait(self, start_vars) + + self.samples = [] # Want only ones with values + self.vals = [] + self.variances = [] + + corr_samples_group = start_vars['corr_samples_group'] + if corr_samples_group != 'samples_other': + self.process_samples(start_vars, self.dataset.group.samplelist, ()) + #for sample in self.dataset.group.samplelist: + # value = start_vars['value:' + sample] + # variance = start_vars['variance:' + sample] + # if variance.strip().lower() == 'x': + # variance = 0 + # else: + # variance = float(variance) + # if value.strip().lower() != 'x': + # self.samples.append(str(sample)) + # self.vals.append(float(value)) + # self.variances.append(variance) + + if corr_samples_group != 'samples_primary': + primary_samples = (self.dataset.group.parlist + + self.dataset.group.f1list + + self.dataset.group.samplelist) + self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) + #for sample in self.this_trait.data.keys(): + # if sample not in primary_samples: + # value = start_vars['value:' + sample] + # variance = start_vars['variance:' + sample] + # if variance.strip().lower() == 'x': + # variance = 0 + # else: + # variance = float(variance) + # if value.strip().lower() != 'x': + # self.samples.append(str(sample)) + # self.vals.append(float(value)) + # self.variances.append(variance) + + print("self.samples is:", pf(self.samples)) + + #sample_list = get_sample_data(fd) + #print("sample_list is", pf(sample_list)) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset - self.target_db_name = fd.corr_dataset + self.target_db_name = start_vars['corr_dataset'] + # Zach said this is ok + # Auth if needed #try: - #print("target_db_name is:", target_db_name) - self.db = create_dataset(self.db_conn, self.target_db_name) - #except: - # detail = ["The database you just requested has not been established yet."] - # self.error(detail) - # return - - # Auth if needed - try: - auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName) - except AuthException as e: - detail = [e.message] - return self.error(detail) + # auth_user_for_db(self.db, self.cursor, self.target_db_name, self.privilege, self.userName) + #except AuthException as e: + # detail = [e.message] + # return self.error(detail) #XZ, 09/18/2008: filter out the strains that have no value. - self.sample_names, vals, vars, N = fd.informativeStrains(sample_list) + #self.sample_names, vals, vars, N = fd.informativeStrains(sample_list) - print("samplenames is:", pf(self.sample_names)) + #print("samplenames is:", pf(self.sample_names)) #CF - If less than a minimum number of strains/cases in common, don't calculate anything - if len(self.sample_names) < self.corr_min_informative: - detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)] - self.error(heading=None, detail=detail) - - for key, value in self.__dict__.items(): - if key.startswith("corr"): - print("[red] %s - %s" % (key, value)) + #if len(self.sample_names) < self.corr_min_informative: + # detail = ['Fewer than %d strain data were entered for %s data set. No calculation of correlation has been attempted.' % (self.corr_min_informative, fd.RISet)] + # self.error(heading=None, detail=detail) #correlation_method = self.CORRELATION_METHODS[self.method] #rankOrder = self.RANK_ORDERS[self.method] # CF - Number of results returned # Todo: Get rid of self.returnNumber - self.returnNumber = self.corr_return_results - self.record_count = 0 + #self.record_count = 0 - myTrait = get_custom_trait(fd, self.cursor) + #myTrait = get_custom_trait(fd, self.cursor) # We will not get Literature Correlations if there is no GeneId because there is nothing # to look against - self.gene_id = int(fd.GeneId) + self.geneid = self.this_trait.geneid # We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against - self.trait_symbol = myTrait.symbol + #self.trait_symbol = myTrait.symbol #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.species, self.gene_id) + self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid) #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' self.tissue_probeset_freeze_id = 1 - traitList = self.correlate(vals) + traitList = self.correlate(self.vals) _log.info("Done doing correlation calculation") @@ -741,13 +753,19 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); else: self.dict['body'] = "" - -############################# -# # -# CorrelationPage Functions # -# # -############################# - + def process_samples(self, start_vars, sample_names, excluded_samples): + for sample in sample_names: + if sample not in excluded_samples: + value = start_vars['value:' + sample] + variance = start_vars['variance:' + sample] + if variance.strip().lower() == 'x': + variance = 0 + else: + variance = float(variance) + if value.strip().lower() != 'x': + self.samples.append(str(sample)) + self.vals.append(float(value)) + self.variances.append(variance) def getSortByValue(self, calculationMethod): @@ -805,8 +823,7 @@ Resorting this table <br> """Returns the name of the reference database file with which correlations are calculated. Takes argument cursor which is a cursor object of any instance of a subclass of templatePage Used by correlationPage""" - - query = 'SELECT Id, FullName FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name +ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name self.cursor.execute(query) result = self.cursor.fetchone() Id = result[0] @@ -817,6 +834,7 @@ Resorting this table <br> FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' return FileName + query = 'SELECT Id, FullName F #XZ, 01/29/2009: I modified this function. @@ -835,26 +853,32 @@ Resorting this table <br> #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid #XZ, 12/12/2008: if the input geneid is 'None', return 0 #XZ, 12/12/2008: if the input geneid has no corresponding mouse geneid, return 0 - def translateToMouseGeneID (self, species, geneid): - mouse_geneid = 0; + def translateToMouseGeneID(self, species, geneid): + #mouse_geneid = 0 - #if input geneid is None, return 0. if not geneid: - return mouse_geneid + return 0 + + #self.id, self.name, self.fullname, self.shortname = g.db.execute(""" + # SELECT Id, Name, FullName, ShortName + # FROM %s + # WHERE public > %s AND + # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + # """ % (query_args)).fetchone() if species == 'mouse': mouse_geneid = geneid elif species == 'rat': - self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE rat=%d" % int(geneid) ) - record = self.cursor.fetchone() - if record: - mouse_geneid = record[0] + mouse_geneid = g.db.execute( + """SELECT mouse FROM GeneIDXRef WHERE rat='%d'""", int(geneid)).fetchone().mouse + #if record: + # mouse_geneid = record[0] elif species == 'human': - self.cursor.execute( "SELECT mouse FROM GeneIDXRef WHERE human=%d" % int(geneid) ) - record = self.cursor.fetchone() - if record: - mouse_geneid = record[0] - + mouse_geneid = g.db.execute( + """SELECT mouse FROM GeneIDXRef WHERE human='%d'""", int(geneid)).fetchone().mouse + #if record: + # mouse_geneid = record[0] + print("mouse_geneid:", mouse_geneid) return mouse_geneid @@ -880,7 +904,6 @@ Resorting this table <br> except: return False - def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): StrainIds = [] @@ -1181,9 +1204,10 @@ Resorting this table <br> return traitList - def get_trait(self, cached, vals): + def get_traits(self, vals): - if cached: + #Todo: Redo cached stuff using memcached + if False: _log.info("Using the fast method because the file exists") lit_corrs = {} tissue_corrs = {} @@ -1235,14 +1259,14 @@ Resorting this table <br> return traits, new_vals else: - _log.info("Using the slow method for correlation") - - _log.info("Fetching from database") + #_log.info("Using the slow method for correlation") + # + #_log.info("Fetching from database") traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) - _log.info("Done fetching from database") + #_log.info("Done fetching from database") totalTraits = len(traits) #XZ, 09/18/2008: total trait number - return traits, vals + return traits def do_parallel_correlation(self): @@ -1302,17 +1326,17 @@ Resorting this table <br> _log.info("Done correlating using the fast method") - def correlate(self, vals): + def correlate(self): correlations = [] #XZ: Use the fast method only for probeset dataset, and this dataset must have been created. #XZ: Otherwise, use original method - _log.info("Entering correlation") + #_log.info("Entering correlation") - db_filename = self.getFileName( target_db_name=self.target_db_name ) - - cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) + #db_filename = self.getFileName(target_db_name=self.target_db_name) + # + #cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) # If the cache file exists, do a cached correlation for probeset data if self.db.type == "ProbeSet": @@ -1321,7 +1345,7 @@ Resorting this table <br> # # else: - (traits, vals) = self.get_trait(cache_available, vals) + traits = self.get_traits(self.vals) for trait in traits: trait.calculate_correlation(vals, self.method) @@ -2080,3 +2104,4 @@ Resorting this table <br> newrow += 1 return tblobj_body, worksheet, corrScript + diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 374e7c95..7cdc350f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -19,9 +19,9 @@ from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil from base.trait import GeneralTrait -from base.data_set import create_dataset +from base import data_set from base.templatePage import templatePage -from utility import webqtlUtil +from utility import webqtlUtil, helper_functions from base import webqtlConfig from dbFunction import webqtlDatabaseFunction from base.GeneralObject import GeneralObject @@ -54,10 +54,8 @@ class MarkerRegression(object): #print("start_vars are: ", pf(start_vars)) - self.dataset = create_dataset(start_vars['dataset_name']) - self.this_trait = GeneralTrait(dataset=self.dataset.name, - name=start_vars['trait_id'], - cellid=None) + helper_functions.get_dataset_and_trait(self, start_vars) + self.num_perm = int(start_vars['num_perm']) # Passed in by the form (user might have edited) @@ -67,9 +65,6 @@ class MarkerRegression(object): self.vals = [] self.variances = [] - self.dataset.group.read_genotype_file() - self.genotype = self.dataset.group.genotype - assert start_vars['display_all_lrs'] in ('True', 'False') self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 9bd45905..603c40f5 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -14,9 +14,9 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList -from utility import webqtlUtil, Plot, Bunch +from utility import webqtlUtil, Plot, Bunch, helper_functions from base.trait import GeneralTrait -from base.data_set import create_dataset +from base import data_set from dbFunction import webqtlDatabaseFunction from base.templatePage import templatePage from basicStatistics import BasicStatisticsFunctions @@ -38,17 +38,19 @@ class ShowTrait(object): print("in ShowTrait, kw are:", kw) self.trait_id = kw['trait_id'] - self.dataset = create_dataset(kw['dataset']) + helper_functions.get_dataset_and_trait(self, kw) - #self.cell_id = None - - - this_trait = GeneralTrait(dataset=self.dataset.name, - name=self.trait_id, - cellid=None) - - - self.dataset.group.read_genotype_file() + #self.dataset = create_dataset(kw['dataset']) + # + ##self.cell_id = None + # + # + #this_trait = GeneralTrait(dataset=self.dataset.name, + # name=self.trait_id, + # cellid=None) + # + # + #self.dataset.group.read_genotype_file() if not self.dataset.group.genotype: self.read_data(include_f1=True) @@ -101,23 +103,22 @@ class ShowTrait(object): #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor, # groupName=fd.group) - self.dispTraitInformation(kw, "", hddn, this_trait) #Display trait information + function buttons + self.dispTraitInformation(kw, "", hddn, self.this_trait) #Display trait information + function buttons #if this_trait == None: # this_trait = webqtlTrait(data=kw['allTraitData'], dataset=None) - self.build_correlation_tools(this_trait) + self.build_correlation_tools(self.this_trait) - self.make_sample_lists(this_trait) + self.make_sample_lists(self.this_trait) if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['trait_id'] = self.trait_id - hddn['dataset_name'] = self.dataset.name + hddn['dataset'] = self.dataset.name # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self - self.this_trait = this_trait self.hddn = hddn self.sample_group_types = OrderedDict() diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index c9659a83..472548f0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -19,7 +19,7 @@ from wqflask import search_results from wqflask.show_trait import show_trait from wqflask.show_trait import export_trait_data from wqflask.marker_regression import marker_regression -from wqflask.correlation import CorrelationPage +from wqflask.correlation import show_corr_results from wqflask.dataSharing import SharingInfo, SharingInfoPage @@ -161,8 +161,8 @@ def marker_regression_page(): @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): print("In corr_compute, request.args is:", pf(request.form)) - fd = webqtlFormData.webqtlFormData(request.form) - template_vars = CorrelationPage.CorrelationPage(fd) + #fd = webqtlFormData.webqtlFormData(request.form) + template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) @app.route("/int_mapping", methods=('POST',)) |