about summary refs log tree commit diff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py20
-rw-r--r--wqflask/maintenance/quick_search_table.py8
-rw-r--r--wqflask/wqflask/interval_mapping/interval_mapping.py312
-rw-r--r--wqflask/wqflask/my_pylmm/data/geno_to_ped.py22
4 files changed, 350 insertions, 12 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index beb62bd7..9fa7beb3 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -168,13 +168,13 @@ class Markers(object):
         
         for marker, p_value in itertools.izip(self.markers, p_values):
             marker['p_value'] = p_value
-            if marker['p_value'] == 0:
-                marker['lod_score'] = 0
-                marker['lrs_value'] = 0
-            else:
-                marker['lod_score'] = -math.log10(marker['p_value'])
-                #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
-                marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+            if math.isnan(marker['p_value']):
+                print("p_value is:", marker['p_value'])
+            marker['lod_score'] = -math.log10(marker['p_value'])
+            #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+            marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+        
+        
 
 
 class HumanMarkers(Markers):
@@ -189,6 +189,8 @@ class HumanMarkers(Markers):
             marker['name'] = splat[1]
             marker['Mb'] = float(splat[3]) / 1000000
             self.markers.append(marker)
+            
+        #print("markers is: ", pf(self.markers))
 
 
     def add_pvalues(self, p_values):
@@ -315,10 +317,10 @@ class DatasetGroup(object):
 
         #determine default genotype object
         if self.incparentsf1 and genotype_1.type != "intercross":
-            genotype = genotype_2
+            self.genotype = genotype_2
         else:
             self.incparentsf1 = 0
-            genotype = genotype_1
+            self.genotype = genotype_1
 
         self.samplelist = list(genotype.prgy)
 
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index eef61857..f0075df0 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -13,8 +13,10 @@ each trait, its dataset, and several columns determined by its trait type (pheno
 
 from __future__ import absolute_import, division, print_function
 
-#import sys
-#sys.path.append("../../..")
+	# We do this here so we can use zach_settings
+# Not to avoid other absoulte_imports
+import sys
+sys.path.append("../../..")
 
 import simplejson as json
 
@@ -504,4 +506,4 @@ def main():
     ProbeSetXRef.run()
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
new file mode 100644
index 00000000..5d660224
--- /dev/null
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -0,0 +1,312 @@
+from __future__ import absolute_import, print_function, division
+
+from base.trait import GeneralTrait
+from base import data_set  #import create_dataset
+
+from pprint import pformat as pf
+
+import string
+import sys
+import os
+import collections
+
+import numpy as np
+from scipy import linalg
+import rpy2.robjects
+
+import simplejson as json
+
+#from redis import Redis
+
+
+from base.trait import GeneralTrait
+from base import data_set
+from base import species
+from base import webqtlConfig
+from wqflask.my_pylmm.data import prep_data
+from wqflask.my_pylmm.pyLMM import lmm
+from wqflask.my_pylmm.pyLMM import input
+from utility import helper_functions
+from utility import Plot, Bunch
+from utility import temp_data
+
+from utility.benchmark import Bench
+
+
+class IntervalMapping(object):
+
+    def __init__(self, start_vars, temp_uuid):
+
+        #Currently only getting trait data for one trait, but will need
+        #to change this to accept multiple traits once the collection page is implemented
+        helper_functions.get_species_dataset_trait(self, start_vars)
+
+        tempdata = temp_data.TempData(temp_uuid)
+        
+        self.samples = [] # Want only ones with values
+        self.vals = []
+
+        for sample in self.dataset.group.samplelist:
+            value = start_vars['value:' + sample]
+            self.samples.append(str(sample))
+            self.vals.append(value)
+ 
+        self.set_options(start_vars)
+ 
+        self.gen_qtl_results(tempdata)
+
+        #Get chromosome lengths for drawing the interval map plot
+        chromosome_mb_lengths = {}
+        for key in self.species.chromosomes.chromosomes.keys():
+            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
+        
+        self.js_data = dict(
+            lrs_lod = self.lrs_lod,
+            chromosomes = chromosome_mb_lengths,
+            qtl_results = self.qtl_results,
+        )
+
+    def set_options(self, start_vars):
+        """Sets various options (physical/genetic mapping, # permutations, which chromosome"""
+        
+        self.plot_scale = start_vars['scale']
+        #if self.plotScale == 'physic' and not fd.genotype.Mbmap:
+        #    self.plotScale = 'morgan'
+        self.num_permutations = start_vars['num_permutations']
+        self.do_bootstrap = start_vars['do_bootstrap']
+        self.control_locus = start_vars['control_locus']
+        self.selected_chr = start_vars['selected_chr']
+        self.weighted_regression = start_vars['weighted']
+        self.lrs_lod = start_vars['lrs_lod']
+
+
+    def gen_qtl_results(self, tempdata):
+        """Generates qtl results for plotting interval map"""
+
+        self.dataset.group.get_markers()
+        self.dataset.read_genotype_file()
+
+        samples, values, variances = self.trait.export_informative()
+        if self.control_locus:
+            if self.weighted_regression:
+                qtl_result = self.dataset.genotype.regression(strains = samples,
+                                                              trait = values,
+                                                              variance = variances,
+                                                              control = self.control_locus)
+            else:
+                qtl_result = self.dataset.genotype.regression(strains = samples,
+                                                              trait = values,
+                                                              control = self.control_locus)
+        else:
+            if self.weighted_regression:
+                qtl_result = self.dataset.genotype.regression(strains = samples,
+                                                              trait = values,
+                                                              variance = variances)
+            else:
+                qtl_result = self.dataset.genotype.regression(strains = samples,
+                                                              trait = values)
+
+
+        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+
+        #if self.dataset.group.species == "human":
+        #    p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+        #else:
+        genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+        
+        no_val_samples = self.identify_empty_samples()
+        trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+        
+        genotype_matrix = np.array(trimmed_genotype_data).T
+        
+        t_stats, p_values = lmm.run(
+            pheno_vector,
+            genotype_matrix,
+            restricted_max_likelihood=True,
+            refit=False,
+            temp_data=tempdata
+        )
+        
+        self.dataset.group.markers.add_pvalues(p_values)
+        
+        self.qtl_results = self.dataset.group.markers.markers
+
+    #def gen_qtl_results_2(self, tempdata):
+    #    """Generates qtl results for plotting interval map"""
+    #
+    #    self.dataset.group.get_markers()
+    #    self.dataset.read_genotype_file()
+    #
+    #    pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+    #
+    #    #if self.dataset.group.species == "human":
+    #    #    p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+    #    #else:
+    #    genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+    #    
+    #    no_val_samples = self.identify_empty_samples()
+    #    trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+    #    
+    #    genotype_matrix = np.array(trimmed_genotype_data).T
+    #    
+    #    t_stats, p_values = lmm.run(
+    #        pheno_vector,
+    #        genotype_matrix,
+    #        restricted_max_likelihood=True,
+    #        refit=False,
+    #        temp_data=tempdata
+    #    )
+    #    
+    #    self.dataset.group.markers.add_pvalues(p_values)
+    #    
+    #    self.qtl_results = self.dataset.group.markers.markers
+
+
+    def identify_empty_samples(self):
+        no_val_samples = []
+        for sample_count, val in enumerate(self.vals):
+            if val == "x":
+                no_val_samples.append(sample_count)
+        return no_val_samples
+        
+        
+    def trim_genotypes(self, genotype_data, no_value_samples):
+        trimmed_genotype_data = []
+        for marker in genotype_data:
+            new_genotypes = []
+            for item_count, genotype in enumerate(marker):
+                if item_count in no_value_samples:
+                    continue
+                try:
+                    genotype = float(genotype)
+                except ValueError:
+                    genotype = np.nan
+                    pass
+                new_genotypes.append(genotype)
+            trimmed_genotype_data.append(new_genotypes)
+        return trimmed_genotype_data
+
+    #def get_qtl_results(self):
+    #    """Gets the LOD (or LRS) score at each marker in order do the qtl mapping"""
+    #
+    #    
+    #    
+    #    #self.genotype = self.genotype.addinterval()
+    #    #resultSlice = []
+    #    #controlGeno = []
+    #    
+    #    #if self.multipleInterval:
+    #    #    self.suggestive = 0
+    #    #    self.significance = 0
+    #    #    if self.selectedChr > -1:
+    #    #        self.genotype.chromosome = [self.genotype[self.selectedChr]]
+    #    #else:
+    #    #single interval mapping
+    #    #try:
+    #    #    self.suggestive = float(fd.formdata.getvalue('permSuggestive'))
+    #    #    self.significance = float(fd.formdata.getvalue('permSignificance'))
+    #    #except:
+    #    #    self.suggestive = None
+    #    #    self.significance = None
+    #    
+    #    #NOT DOING MULTIPLE TRAITS YET, BUT WILL NEED TO LATER
+    #    #_strains, _vals, _vars = self.traitList[0].exportInformative(weightedRegression)
+    #    
+    #    #if webqtlUtil.ListNotNull(_vars):
+    #    #    pass
+    #    #else:
+    #    #    weightedRegression = 0
+    #    #    _strains, _vals, _vars = self.traitList[0].exportInformative()
+    #        
+    #    ##locate genotype of control Locus
+    #    #if self.controlLocus:
+    #    #    controlGeno2 = []
+    #    #    _FIND = 0
+    #    #    for _chr in self.genotype:
+    #    #        for _locus in _chr:
+    #    #            if _locus.name == self.controlLocus:
+    #    #                controlGeno2 = _locus.genotype
+    #    #                _FIND = 1
+    #    #                break
+    #    #        if _FIND:
+    #    #            break
+    #    #    if controlGeno2:
+    #    #        _prgy = list(self.genotype.prgy)
+    #    #        for _strain in _strains:
+    #    #            _idx = _prgy.index(_strain)
+    #    #            controlGeno.append(controlGeno2[_idx])
+    #    #    else:
+    #    #        return "The control marker you selected is not in the genofile."
+    #    #
+    #    #    
+    #    #    if self.significance and self.suggestive:
+    #    #        pass
+    #    #    else:
+    #    #        if self.permChecked:
+    #    #            if weightedRegression:
+    #    #                self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals, 
+    #    #                    variance = _vars, nperm=fd.nperm)
+    #    #            else:
+    #    #                self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals, 
+    #    #                    nperm=fd.nperm)
+    #    #            self.suggestive = self.LRSArray[int(fd.nperm*0.37-1)]
+    #    #            self.significance = self.LRSArray[int(fd.nperm*0.95-1)]
+    #    #
+    #    #        else:
+    #    #            self.suggestive = 9.2
+    #    #            self.significance = 16.1
+    #    #
+    #    #    #calculating bootstrap
+    #    #    #from now on, genotype could only contain a single chromosome 
+    #    #    #permutation need to be performed genome wide, this is not the case for bootstrap
+    #    #    
+    #    #    #due to the design of qtlreaper, composite regression need to be performed genome wide
+    #    #    if not self.controlLocus and self.selectedChr > -1:
+    #    #        self.genotype.chromosome = [self.genotype[self.selectedChr]]
+    #    #    elif self.selectedChr > -1: #self.controlLocus and self.selectedChr > -1
+    #    #        lociPerChr = map(len, self.genotype)
+    #    #        resultSlice = reduce(lambda X, Y: X+Y, lociPerChr[:self.selectedChr], 0)
+    #    #        resultSlice = [resultSlice,resultSlice+lociPerChr[self.selectedChr]]
+    #    #    else:
+    #    #        pass
+    #        
+    #    #calculate QTL for each trait
+    #    self.qtl_results = []
+    #
+    #    #for thisTrait in self.traitList:
+    #    _strains, _vals, _vars = thisTrait.exportInformative(weightedRegression)
+    #    if self.controlLocus:
+    #        if weightedRegression:
+    #            qtlresult = self.genotype.regression(strains = _strains, trait = _vals, 
+    #                    variance = _vars, control = self.controlLocus)
+    #        else:
+    #            qtlresult = self.genotype.regression(strains = _strains, trait = _vals,
+    #                control = self.controlLocus)
+    #        if resultSlice:
+    #            qtlresult = qtlresult[resultSlice[0]:resultSlice[1]]
+    #    else:
+    #        if weightedRegression:
+    #            qtlresult = self.genotype.regression(strains = _strains, trait = _vals, 
+    #                    variance = _vars)
+    #        else:
+    #            qtlresult = self.genotype.regression(strains = _strains, trait = _vals)
+    #
+    #    self.qtlresults.append(qtlresult)
+    #    
+    #    if not self.multipleInterval:
+    #        if self.controlLocus and self.selectedChr > -1:
+    #            self.genotype.chromosome = [self.genotype[self.selectedChr]]
+    #            
+    #        if self.bootChecked:
+    #            if controlGeno:
+    #                self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals,
+    #                    control = controlGeno, nboot=fd.nboot)
+    #            elif weightedRegression:
+    #                self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, 
+    #                    variance = _vars, nboot=fd.nboot)
+    #            else:
+    #                self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals, 
+    #                    nboot=fd.nboot)
+    #        else:
+    #            self.bootResult = []
+
diff --git a/wqflask/wqflask/my_pylmm/data/geno_to_ped.py b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py
new file mode 100644
index 00000000..9091ad9a
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py
@@ -0,0 +1,22 @@
+from __future__ import absolute_import, division, print_function
+
+import csv
+
+class ConvertToPed(object):
+
+    def __init__(self, input_file, output_file):
+        self.input_file = input_file
+        self.output_file = output_file
+        
+    def convert(self):
+    
+        self.haplotype_notation = {
+        '@mat': "1",
+        '@pat': "0",
+        '@het': "0.5",
+        '@unk': "NA"
+        }
+        
+        with open(self.output_file, "w") as self.output_fh:
+            self.process_csv()
+        
\ No newline at end of file