diff options
-rw-r--r-- | misc/notes.txt | 3 | ||||
-rwxr-xr-x | web/webqtl/intervalMapping/IntervalMappingPage.py | 2 | ||||
-rwxr-xr-x | wqflask/base/data_set.py | 34 | ||||
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 8 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 4 | ||||
-rwxr-xr-x | wqflask/wqflask/interval_mapping/interval_mapping.py | 312 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/data/geno_to_ped.py | 22 | ||||
-rw-r--r-- | wqflask/wqflask/templates/base.html | 4 | ||||
-rw-r--r-- | wqflask/wqflask/templates/correlation_page.html | 8 | ||||
-rw-r--r-- | wqflask/wqflask/templates/index_page.html | 2 | ||||
-rw-r--r-- | wqflask/wqflask/templates/marker_regression.html | 10 | ||||
-rw-r--r-- | wqflask/wqflask/templates/quick_search.html | 11 | ||||
-rw-r--r-- | wqflask/wqflask/templates/search_result_page.html | 10 | ||||
-rw-r--r-- | wqflask/wqflask/templates/show_trait.html | 11 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 45 |
15 files changed, 423 insertions, 63 deletions
diff --git a/misc/notes.txt b/misc/notes.txt index 91a0e67c..f6a2bb33 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -90,6 +90,9 @@ Reload web server: Run server: python runserver.py +Run sendmail.py +python send_mail.py + =========================================== UFW - default firewall confirguation tool for Ubuntu; eases iptables firewall configuration diff --git a/web/webqtl/intervalMapping/IntervalMappingPage.py b/web/webqtl/intervalMapping/IntervalMappingPage.py index 4bdf45ab..c3ef1cbd 100755 --- a/web/webqtl/intervalMapping/IntervalMappingPage.py +++ b/web/webqtl/intervalMapping/IntervalMappingPage.py @@ -2038,7 +2038,7 @@ class IntervalMappingPage(templatePage): qtlresult = self.genotype.regression(strains = _strains, trait = _vals) self.qtlresults.append(qtlresult) - + if not self.multipleInterval: if self.controlLocus and self.selectedChr > -1: self.genotype.chromosome = [self.genotype[self.selectedChr]] diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index beb62bd7..f25e7974 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -168,13 +168,13 @@ class Markers(object): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value - if marker['p_value'] == 0: - marker['lod_score'] = 0 - marker['lrs_value'] = 0 - else: - marker['lod_score'] = -math.log10(marker['p_value']) - #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + if math.isnan(marker['p_value']): + print("p_value is:", marker['p_value']) + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + class HumanMarkers(Markers): @@ -189,6 +189,8 @@ class HumanMarkers(Markers): marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) + + #print("markers is: ", pf(self.markers)) def add_pvalues(self, p_values): @@ -315,12 +317,12 @@ class DatasetGroup(object): #determine default genotype object if self.incparentsf1 and genotype_1.type != "intercross": - genotype = genotype_2 + self.genotype = genotype_2 else: self.incparentsf1 = 0 - genotype = genotype_1 + self.genotype = genotype_1 - self.samplelist = list(genotype.prgy) + self.samplelist = list(self.genotype.prgy) #class DataSets(object): @@ -438,10 +440,12 @@ class DataSet(object): def get_trait_data(self, sample_list=None): if sample_list: - self.samplelist = sample_list + self.group.parlist + self.group.f1list + self.samplelist = sample_list else: - self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list - + self.samplelist = self.group.samplelist + + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species @@ -501,8 +505,8 @@ class DataSet(object): and {}Freeze.Name = '{}' and {}.Id = {}XRef.{}Id order by {}.Id - """.format(*mescape(self.type, self.type, self.type, self.type, - self.name, dataset_type, self.type, self.type, dataset_type)) + """.format(*mescape(self.type, self.type, self.type, self.name, + dataset_type, self.type, dataset_type, dataset_type)) else: query += """ WHERE {}XRef.{}FreezeId = {}Freeze.Id diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index eef61857..f0075df0 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -13,8 +13,10 @@ each trait, its dataset, and several columns determined by its trait type (pheno from __future__ import absolute_import, division, print_function -#import sys -#sys.path.append("../../..") + # We do this here so we can use zach_settings +# Not to avoid other absoulte_imports +import sys +sys.path.append("../../..") import simplejson as json @@ -504,4 +506,4 @@ def main(): ProbeSetXRef.run() if __name__ == "__main__": - main()
\ No newline at end of file + main() diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index a5c80674..8f23165c 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -178,10 +178,10 @@ class CorrelationResults(object): trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) - if self.corr_type != "lit": + if self.corr_type != "lit" and self.dataset.type == "ProbeSet": self.do_lit_correlation_for_trait_list() - if self.corr_type != "tissue": + if self.corr_type != "tissue" and self.dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() #print("self.correlation_results: ", pf(self.correlation_results)) diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py new file mode 100755 index 00000000..aca99cbe --- /dev/null +++ b/wqflask/wqflask/interval_mapping/interval_mapping.py @@ -0,0 +1,312 @@ +from __future__ import absolute_import, print_function, division + +from base.trait import GeneralTrait +from base import data_set #import create_dataset + +from pprint import pformat as pf + +import string +import sys +import os +import collections + +import numpy as np +from scipy import linalg +import rpy2.robjects + +import simplejson as json + +#from redis import Redis + + +from base.trait import GeneralTrait +from base import data_set +from base import species +from base import webqtlConfig +from wqflask.my_pylmm.data import prep_data +from wqflask.my_pylmm.pyLMM import lmm +from wqflask.my_pylmm.pyLMM import input +from utility import helper_functions +from utility import Plot, Bunch +from utility import temp_data + +from utility.benchmark import Bench + + +class IntervalMapping(object): + + def __init__(self, start_vars, temp_uuid): + + #Currently only getting trait data for one trait, but will need + #to change this to accept multiple traits once the collection page is implemented + helper_functions.get_species_dataset_trait(self, start_vars) + + tempdata = temp_data.TempData(temp_uuid) + + self.samples = [] # Want only ones with values + self.vals = [] + + for sample in self.dataset.group.samplelist: + value = start_vars['value:' + sample] + self.samples.append(str(sample)) + self.vals.append(value) + + self.set_options(start_vars) + + self.gen_qtl_results(tempdata) + + #Get chromosome lengths for drawing the interval map plot + chromosome_mb_lengths = {} + for key in self.species.chromosomes.chromosomes.keys(): + chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length + + self.js_data = dict( + lrs_lod = self.lrs_lod, + chromosomes = chromosome_mb_lengths, + qtl_results = self.qtl_results, + ) + + def set_options(self, start_vars): + """Sets various options (physical/genetic mapping, # permutations, which chromosome""" + + self.plot_scale = start_vars['scale'] + #if self.plotScale == 'physic' and not fd.genotype.Mbmap: + # self.plotScale = 'morgan' + self.num_permutations = start_vars['num_permutations'] + self.do_bootstrap = start_vars['do_bootstrap'] + self.control_locus = start_vars['control_locus'] + self.selected_chr = start_vars['selected_chr'] + self.weighted_regression = start_vars['weighted'] + self.lrs_lod = start_vars['lrs_lod'] + + + def gen_qtl_results(self, tempdata): + """Generates qtl results for plotting interval map""" + + self.dataset.group.get_markers() + self.dataset.read_genotype_file() + + samples, values, variances = self.trait.export_informative() + if self.control_locus: + if self.weighted_regression: + self.qtl_results = self.dataset.genotype.regression(strains = samples, + trait = values, + variance = variances, + control = self.control_locus) + else: + self.qtl_results = self.dataset.genotype.regression(strains = samples, + trait = values, + control = self.control_locus) + else: + if self.weighted_regression: + self.qtl_results = self.dataset.genotype.regression(strains = samples, + trait = values, + variance = variances) + else: + self.qtl_results = self.dataset.genotype.regression(strains = samples, + trait = values) + + + #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) + + #if self.dataset.group.species == "human": + # p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) + #else: + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] + + no_val_samples = self.identify_empty_samples() + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + genotype_matrix = np.array(trimmed_genotype_data).T + + #t_stats, p_values = lmm.run( + # pheno_vector, + # genotype_matrix, + # restricted_max_likelihood=True, + # refit=False, + # temp_data=tempdata + #) + + #self.dataset.group.markers.add_pvalues(p_values) + + #self.qtl_results = self.dataset.group.markers.markers + + def gen_qtl_results_2(self, tempdata): + """Generates qtl results for plotting interval map""" + + self.dataset.group.get_markers() + self.dataset.read_genotype_file() + + pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) + + #if self.dataset.group.species == "human": + # p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) + #else: + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] + + no_val_samples = self.identify_empty_samples() + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + genotype_matrix = np.array(trimmed_genotype_data).T + + t_stats, p_values = lmm.run( + pheno_vector, + genotype_matrix, + restricted_max_likelihood=True, + refit=False, + temp_data=tempdata + ) + + self.dataset.group.markers.add_pvalues(p_values) + + self.qtl_results = self.dataset.group.markers.markers + + + def identify_empty_samples(self): + no_val_samples = [] + for sample_count, val in enumerate(self.vals): + if val == "x": + no_val_samples.append(sample_count) + return no_val_samples + + + def trim_genotypes(self, genotype_data, no_value_samples): + trimmed_genotype_data = [] + for marker in genotype_data: + new_genotypes = [] + for item_count, genotype in enumerate(marker): + if item_count in no_value_samples: + continue + try: + genotype = float(genotype) + except ValueError: + genotype = np.nan + pass + new_genotypes.append(genotype) + trimmed_genotype_data.append(new_genotypes) + return trimmed_genotype_data + + #def get_qtl_results(self): + # """Gets the LOD (or LRS) score at each marker in order do the qtl mapping""" + # + # + # + # #self.genotype = self.genotype.addinterval() + # #resultSlice = [] + # #controlGeno = [] + # + # #if self.multipleInterval: + # # self.suggestive = 0 + # # self.significance = 0 + # # if self.selectedChr > -1: + # # self.genotype.chromosome = [self.genotype[self.selectedChr]] + # #else: + # #single interval mapping + # #try: + # # self.suggestive = float(fd.formdata.getvalue('permSuggestive')) + # # self.significance = float(fd.formdata.getvalue('permSignificance')) + # #except: + # # self.suggestive = None + # # self.significance = None + # + # #NOT DOING MULTIPLE TRAITS YET, BUT WILL NEED TO LATER + # #_strains, _vals, _vars = self.traitList[0].exportInformative(weightedRegression) + # + # #if webqtlUtil.ListNotNull(_vars): + # # pass + # #else: + # # weightedRegression = 0 + # # _strains, _vals, _vars = self.traitList[0].exportInformative() + # + # ##locate genotype of control Locus + # #if self.controlLocus: + # # controlGeno2 = [] + # # _FIND = 0 + # # for _chr in self.genotype: + # # for _locus in _chr: + # # if _locus.name == self.controlLocus: + # # controlGeno2 = _locus.genotype + # # _FIND = 1 + # # break + # # if _FIND: + # # break + # # if controlGeno2: + # # _prgy = list(self.genotype.prgy) + # # for _strain in _strains: + # # _idx = _prgy.index(_strain) + # # controlGeno.append(controlGeno2[_idx]) + # # else: + # # return "The control marker you selected is not in the genofile." + # # + # # + # # if self.significance and self.suggestive: + # # pass + # # else: + # # if self.permChecked: + # # if weightedRegression: + # # self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals, + # # variance = _vars, nperm=fd.nperm) + # # else: + # # self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals, + # # nperm=fd.nperm) + # # self.suggestive = self.LRSArray[int(fd.nperm*0.37-1)] + # # self.significance = self.LRSArray[int(fd.nperm*0.95-1)] + # # + # # else: + # # self.suggestive = 9.2 + # # self.significance = 16.1 + # # + # # #calculating bootstrap + # # #from now on, genotype could only contain a single chromosome + # # #permutation need to be performed genome wide, this is not the case for bootstrap + # # + # # #due to the design of qtlreaper, composite regression need to be performed genome wide + # # if not self.controlLocus and self.selectedChr > -1: + # # self.genotype.chromosome = [self.genotype[self.selectedChr]] + # # elif self.selectedChr > -1: #self.controlLocus and self.selectedChr > -1 + # # lociPerChr = map(len, self.genotype) + # # resultSlice = reduce(lambda X, Y: X+Y, lociPerChr[:self.selectedChr], 0) + # # resultSlice = [resultSlice,resultSlice+lociPerChr[self.selectedChr]] + # # else: + # # pass + # + # #calculate QTL for each trait + # self.qtl_results = [] + # + # #for thisTrait in self.traitList: + # _strains, _vals, _vars = thisTrait.exportInformative(weightedRegression) + # if self.controlLocus: + # if weightedRegression: + # qtlresult = self.genotype.regression(strains = _strains, trait = _vals, + # variance = _vars, control = self.controlLocus) + # else: + # qtlresult = self.genotype.regression(strains = _strains, trait = _vals, + # control = self.controlLocus) + # if resultSlice: + # qtlresult = qtlresult[resultSlice[0]:resultSlice[1]] + # else: + # if weightedRegression: + # qtlresult = self.genotype.regression(strains = _strains, trait = _vals, + # variance = _vars) + # else: + # qtlresult = self.genotype.regression(strains = _strains, trait = _vals) + # + # self.qtlresults.append(qtlresult) + # + # if not self.multipleInterval: + # if self.controlLocus and self.selectedChr > -1: + # self.genotype.chromosome = [self.genotype[self.selectedChr]] + # + # if self.bootChecked: + # if controlGeno: + # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, + # control = controlGeno, nboot=fd.nboot) + # elif weightedRegression: + # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, + # variance = _vars, nboot=fd.nboot) + # else: + # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, + # nboot=fd.nboot) + # else: + # self.bootResult = [] + diff --git a/wqflask/wqflask/my_pylmm/data/geno_to_ped.py b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py new file mode 100644 index 00000000..9091ad9a --- /dev/null +++ b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py @@ -0,0 +1,22 @@ +from __future__ import absolute_import, division, print_function + +import csv + +class ConvertToPed(object): + + def __init__(self, input_file, output_file): + self.input_file = input_file + self.output_file = output_file + + def convert(self): + + self.haplotype_notation = { + '@mat': "1", + '@pat': "0", + '@het': "0.5", + '@unk': "NA" + } + + with open(self.output_file, "w") as self.output_fh: + self.process_csv() +
\ No newline at end of file diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index 8efba6a7..a14eeb44 100644 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -29,9 +29,9 @@ {% macro header(main, second) %} <header class="jumbotron subhead" id="overview"> <div class="container"> - <h1>Login</h1> + <h1>{{ main }}</h1> <p class="lead"> - Gain access to GeneNetwork. + {{ second }} </p> </div> </header> diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 7db8ea49..f3bb5531 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -6,12 +6,8 @@ <link rel="stylesheet" type="text/css" href="/static/packages/TableTools/media/css/TableTools.css" /> {% endblock %} {% block content %} - - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>Correlation</h1> - </div> - </header> + + {{ header("Correlation", 'Trait: {} Dataset: {}'.format(this_trait.name, dataset.name)) }} <table id="corr_results" class="table table-hover table-striped table-bordered"> <thead> diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 98682e57..d177a7bd 100644 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -3,8 +3,6 @@ {% block content %} <!-- Start of body --> - - <header class="jumbotron subhead" id="overview"> <div class="container"> <h1>GeneNetwork</h1> diff --git a/wqflask/wqflask/templates/marker_regression.html b/wqflask/wqflask/templates/marker_regression.html index 9260acab..64d2e9b7 100644 --- a/wqflask/wqflask/templates/marker_regression.html +++ b/wqflask/wqflask/templates/marker_regression.html @@ -9,14 +9,8 @@ {% endblock %} {% block content %} <!-- Start of body --> - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>Marker Regression</h1> - <p class="lead"> - {{ this_trait.name }}: {{ this_trait.description_fmt }} - </p> - </div> - </header> + {{ header("Marker Regression", + '{}: {}'.format(this_trait.name, this_trait.description_fmt)) }} <div class="container"> <div> diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html index b0e38708..2f268c5a 100644 --- a/wqflask/wqflask/templates/quick_search.html +++ b/wqflask/wqflask/templates/quick_search.html @@ -2,14 +2,9 @@ {% block title %}QuickSearch Results{% endblock %} {% block content %} <!-- Start of body --> - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>QuickSearch Results</h1> - <p class="lead"> - GeneNetwork found {{ numify(results|count, "record", "records") }}. - </p> - </div> - </header> + + {{ header("QuickSearch Results", + 'GeneNetwork found {}.'.format(numify(results|count, "record", "records"))) }} <div class="container"> <div class="page-header"> diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html index 11f68bba..1fe7cce9 100644 --- a/wqflask/wqflask/templates/search_result_page.html +++ b/wqflask/wqflask/templates/search_result_page.html @@ -2,14 +2,8 @@ {% block title %}Search Results{% endblock %} {% block content %} <!-- Start of body --> - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>Search Results</h1> - <p class="lead"> - GeneNetwork found {{ numify(results|count, "record", "records") }}. - </p> - </div> - </header> + {{ header("Search Results", + 'GeneNetwork found {}.'.format(numify(results|count, "record", "records"))) }} <div class="container"> <div class="page-header"> diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 799245c3..e3c84de7 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -7,14 +7,9 @@ {% endblock %} {% block content %} <!-- Start of body --> - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>{{ this_trait.symbol}}</h1> - <p class="lead"> - {{ this_trait.name }}: {{ this_trait.description_fmt }} - </p> - </div> - </header> + {{ header("{}".format(this_trait.symbol), + '{}: {}'.format(this_trait.name, this_trait.description_fmt)) }} + <form method="post" action="/corr_compute" name="trait_page" id="trait_data_form" class="form-horizontal"> diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 98b6039f..e6b99649 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -32,6 +32,7 @@ from base.data_set import create_datasets_list from wqflask.show_trait import show_trait from wqflask.show_trait import export_trait_data from wqflask.marker_regression import marker_regression +from wqflask.interval_mapping import interval_mapping from wqflask.correlation import show_corr_results from utility import temp_data @@ -246,6 +247,50 @@ def marker_regression_page(): return rendered_template +@app.route("/interval_mapping", methods=('POST',)) +def interval_mapping_page(): + initial_start_vars = request.form + temp_uuid = initial_start_vars['temp_uuid'] + wanted = ( + 'trait_id', + 'dataset', + 'suggestive' + ) + + start_vars = {} + for key, value in initial_start_vars.iteritems(): + if key in wanted or key.startswith(('value:')): + start_vars[key] = value + + version = "v1" + key = "interval_mapping:{}:".format(version) + json.dumps(start_vars, sort_keys=True) + print("key is:", pf(key)) + with Bench("Loading cache"): + result = Redis.get(key) + + if result: + print("Cache hit!!!") + with Bench("Loading results"): + result = pickle.loads(result) + else: + print("Cache miss!!!") + template_vars = interval_mapping.IntervalMapping(start_vars, temp_uuid) + + template_vars.js_data = json.dumps(template_vars.js_data, + default=json_default_handler, + indent=" ") + + result = template_vars.__dict__ + + #causeerror + Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) + Redis.expire(key, 60*60) + + with Bench("Rendering template"): + rendered_template = render_template("interval_mapping.html", **result) + + return rendered_template + @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): print("In corr_compute, request.form is:", pf(request.form)) |