aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py20
-rw-r--r--wqflask/maintenance/quick_search_table.py8
-rw-r--r--wqflask/wqflask/interval_mapping/interval_mapping.py312
-rw-r--r--wqflask/wqflask/my_pylmm/data/geno_to_ped.py22
4 files changed, 350 insertions, 12 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index beb62bd7..9fa7beb3 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -168,13 +168,13 @@ class Markers(object):
for marker, p_value in itertools.izip(self.markers, p_values):
marker['p_value'] = p_value
- if marker['p_value'] == 0:
- marker['lod_score'] = 0
- marker['lrs_value'] = 0
- else:
- marker['lod_score'] = -math.log10(marker['p_value'])
- #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
- marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ if math.isnan(marker['p_value']):
+ print("p_value is:", marker['p_value'])
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
+
class HumanMarkers(Markers):
@@ -189,6 +189,8 @@ class HumanMarkers(Markers):
marker['name'] = splat[1]
marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
+
+ #print("markers is: ", pf(self.markers))
def add_pvalues(self, p_values):
@@ -315,10 +317,10 @@ class DatasetGroup(object):
#determine default genotype object
if self.incparentsf1 and genotype_1.type != "intercross":
- genotype = genotype_2
+ self.genotype = genotype_2
else:
self.incparentsf1 = 0
- genotype = genotype_1
+ self.genotype = genotype_1
self.samplelist = list(genotype.prgy)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index eef61857..f0075df0 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -13,8 +13,10 @@ each trait, its dataset, and several columns determined by its trait type (pheno
from __future__ import absolute_import, division, print_function
-#import sys
-#sys.path.append("../../..")
+ # We do this here so we can use zach_settings
+# Not to avoid other absoulte_imports
+import sys
+sys.path.append("../../..")
import simplejson as json
@@ -504,4 +506,4 @@ def main():
ProbeSetXRef.run()
if __name__ == "__main__":
- main() \ No newline at end of file
+ main()
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
new file mode 100644
index 00000000..5d660224
--- /dev/null
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -0,0 +1,312 @@
+from __future__ import absolute_import, print_function, division
+
+from base.trait import GeneralTrait
+from base import data_set #import create_dataset
+
+from pprint import pformat as pf
+
+import string
+import sys
+import os
+import collections
+
+import numpy as np
+from scipy import linalg
+import rpy2.robjects
+
+import simplejson as json
+
+#from redis import Redis
+
+
+from base.trait import GeneralTrait
+from base import data_set
+from base import species
+from base import webqtlConfig
+from wqflask.my_pylmm.data import prep_data
+from wqflask.my_pylmm.pyLMM import lmm
+from wqflask.my_pylmm.pyLMM import input
+from utility import helper_functions
+from utility import Plot, Bunch
+from utility import temp_data
+
+from utility.benchmark import Bench
+
+
+class IntervalMapping(object):
+
+ def __init__(self, start_vars, temp_uuid):
+
+ #Currently only getting trait data for one trait, but will need
+ #to change this to accept multiple traits once the collection page is implemented
+ helper_functions.get_species_dataset_trait(self, start_vars)
+
+ tempdata = temp_data.TempData(temp_uuid)
+
+ self.samples = [] # Want only ones with values
+ self.vals = []
+
+ for sample in self.dataset.group.samplelist:
+ value = start_vars['value:' + sample]
+ self.samples.append(str(sample))
+ self.vals.append(value)
+
+ self.set_options(start_vars)
+
+ self.gen_qtl_results(tempdata)
+
+ #Get chromosome lengths for drawing the interval map plot
+ chromosome_mb_lengths = {}
+ for key in self.species.chromosomes.chromosomes.keys():
+ chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
+
+ self.js_data = dict(
+ lrs_lod = self.lrs_lod,
+ chromosomes = chromosome_mb_lengths,
+ qtl_results = self.qtl_results,
+ )
+
+ def set_options(self, start_vars):
+ """Sets various options (physical/genetic mapping, # permutations, which chromosome"""
+
+ self.plot_scale = start_vars['scale']
+ #if self.plotScale == 'physic' and not fd.genotype.Mbmap:
+ # self.plotScale = 'morgan'
+ self.num_permutations = start_vars['num_permutations']
+ self.do_bootstrap = start_vars['do_bootstrap']
+ self.control_locus = start_vars['control_locus']
+ self.selected_chr = start_vars['selected_chr']
+ self.weighted_regression = start_vars['weighted']
+ self.lrs_lod = start_vars['lrs_lod']
+
+
+ def gen_qtl_results(self, tempdata):
+ """Generates qtl results for plotting interval map"""
+
+ self.dataset.group.get_markers()
+ self.dataset.read_genotype_file()
+
+ samples, values, variances = self.trait.export_informative()
+ if self.control_locus:
+ if self.weighted_regression:
+ qtl_result = self.dataset.genotype.regression(strains = samples,
+ trait = values,
+ variance = variances,
+ control = self.control_locus)
+ else:
+ qtl_result = self.dataset.genotype.regression(strains = samples,
+ trait = values,
+ control = self.control_locus)
+ else:
+ if self.weighted_regression:
+ qtl_result = self.dataset.genotype.regression(strains = samples,
+ trait = values,
+ variance = variances)
+ else:
+ qtl_result = self.dataset.genotype.regression(strains = samples,
+ trait = values)
+
+
+ pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+
+ #if self.dataset.group.species == "human":
+ # p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+ #else:
+ genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+
+ no_val_samples = self.identify_empty_samples()
+ trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+
+ genotype_matrix = np.array(trimmed_genotype_data).T
+
+ t_stats, p_values = lmm.run(
+ pheno_vector,
+ genotype_matrix,
+ restricted_max_likelihood=True,
+ refit=False,
+ temp_data=tempdata
+ )
+
+ self.dataset.group.markers.add_pvalues(p_values)
+
+ self.qtl_results = self.dataset.group.markers.markers
+
+ #def gen_qtl_results_2(self, tempdata):
+ # """Generates qtl results for plotting interval map"""
+ #
+ # self.dataset.group.get_markers()
+ # self.dataset.read_genotype_file()
+ #
+ # pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+ #
+ # #if self.dataset.group.species == "human":
+ # # p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+ # #else:
+ # genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+ #
+ # no_val_samples = self.identify_empty_samples()
+ # trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+ #
+ # genotype_matrix = np.array(trimmed_genotype_data).T
+ #
+ # t_stats, p_values = lmm.run(
+ # pheno_vector,
+ # genotype_matrix,
+ # restricted_max_likelihood=True,
+ # refit=False,
+ # temp_data=tempdata
+ # )
+ #
+ # self.dataset.group.markers.add_pvalues(p_values)
+ #
+ # self.qtl_results = self.dataset.group.markers.markers
+
+
+ def identify_empty_samples(self):
+ no_val_samples = []
+ for sample_count, val in enumerate(self.vals):
+ if val == "x":
+ no_val_samples.append(sample_count)
+ return no_val_samples
+
+
+ def trim_genotypes(self, genotype_data, no_value_samples):
+ trimmed_genotype_data = []
+ for marker in genotype_data:
+ new_genotypes = []
+ for item_count, genotype in enumerate(marker):
+ if item_count in no_value_samples:
+ continue
+ try:
+ genotype = float(genotype)
+ except ValueError:
+ genotype = np.nan
+ pass
+ new_genotypes.append(genotype)
+ trimmed_genotype_data.append(new_genotypes)
+ return trimmed_genotype_data
+
+ #def get_qtl_results(self):
+ # """Gets the LOD (or LRS) score at each marker in order do the qtl mapping"""
+ #
+ #
+ #
+ # #self.genotype = self.genotype.addinterval()
+ # #resultSlice = []
+ # #controlGeno = []
+ #
+ # #if self.multipleInterval:
+ # # self.suggestive = 0
+ # # self.significance = 0
+ # # if self.selectedChr > -1:
+ # # self.genotype.chromosome = [self.genotype[self.selectedChr]]
+ # #else:
+ # #single interval mapping
+ # #try:
+ # # self.suggestive = float(fd.formdata.getvalue('permSuggestive'))
+ # # self.significance = float(fd.formdata.getvalue('permSignificance'))
+ # #except:
+ # # self.suggestive = None
+ # # self.significance = None
+ #
+ # #NOT DOING MULTIPLE TRAITS YET, BUT WILL NEED TO LATER
+ # #_strains, _vals, _vars = self.traitList[0].exportInformative(weightedRegression)
+ #
+ # #if webqtlUtil.ListNotNull(_vars):
+ # # pass
+ # #else:
+ # # weightedRegression = 0
+ # # _strains, _vals, _vars = self.traitList[0].exportInformative()
+ #
+ # ##locate genotype of control Locus
+ # #if self.controlLocus:
+ # # controlGeno2 = []
+ # # _FIND = 0
+ # # for _chr in self.genotype:
+ # # for _locus in _chr:
+ # # if _locus.name == self.controlLocus:
+ # # controlGeno2 = _locus.genotype
+ # # _FIND = 1
+ # # break
+ # # if _FIND:
+ # # break
+ # # if controlGeno2:
+ # # _prgy = list(self.genotype.prgy)
+ # # for _strain in _strains:
+ # # _idx = _prgy.index(_strain)
+ # # controlGeno.append(controlGeno2[_idx])
+ # # else:
+ # # return "The control marker you selected is not in the genofile."
+ # #
+ # #
+ # # if self.significance and self.suggestive:
+ # # pass
+ # # else:
+ # # if self.permChecked:
+ # # if weightedRegression:
+ # # self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals,
+ # # variance = _vars, nperm=fd.nperm)
+ # # else:
+ # # self.LRSArray = self.genotype.permutation(strains = _strains, trait = _vals,
+ # # nperm=fd.nperm)
+ # # self.suggestive = self.LRSArray[int(fd.nperm*0.37-1)]
+ # # self.significance = self.LRSArray[int(fd.nperm*0.95-1)]
+ # #
+ # # else:
+ # # self.suggestive = 9.2
+ # # self.significance = 16.1
+ # #
+ # # #calculating bootstrap
+ # # #from now on, genotype could only contain a single chromosome
+ # # #permutation need to be performed genome wide, this is not the case for bootstrap
+ # #
+ # # #due to the design of qtlreaper, composite regression need to be performed genome wide
+ # # if not self.controlLocus and self.selectedChr > -1:
+ # # self.genotype.chromosome = [self.genotype[self.selectedChr]]
+ # # elif self.selectedChr > -1: #self.controlLocus and self.selectedChr > -1
+ # # lociPerChr = map(len, self.genotype)
+ # # resultSlice = reduce(lambda X, Y: X+Y, lociPerChr[:self.selectedChr], 0)
+ # # resultSlice = [resultSlice,resultSlice+lociPerChr[self.selectedChr]]
+ # # else:
+ # # pass
+ #
+ # #calculate QTL for each trait
+ # self.qtl_results = []
+ #
+ # #for thisTrait in self.traitList:
+ # _strains, _vals, _vars = thisTrait.exportInformative(weightedRegression)
+ # if self.controlLocus:
+ # if weightedRegression:
+ # qtlresult = self.genotype.regression(strains = _strains, trait = _vals,
+ # variance = _vars, control = self.controlLocus)
+ # else:
+ # qtlresult = self.genotype.regression(strains = _strains, trait = _vals,
+ # control = self.controlLocus)
+ # if resultSlice:
+ # qtlresult = qtlresult[resultSlice[0]:resultSlice[1]]
+ # else:
+ # if weightedRegression:
+ # qtlresult = self.genotype.regression(strains = _strains, trait = _vals,
+ # variance = _vars)
+ # else:
+ # qtlresult = self.genotype.regression(strains = _strains, trait = _vals)
+ #
+ # self.qtlresults.append(qtlresult)
+ #
+ # if not self.multipleInterval:
+ # if self.controlLocus and self.selectedChr > -1:
+ # self.genotype.chromosome = [self.genotype[self.selectedChr]]
+ #
+ # if self.bootChecked:
+ # if controlGeno:
+ # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals,
+ # control = controlGeno, nboot=fd.nboot)
+ # elif weightedRegression:
+ # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals,
+ # variance = _vars, nboot=fd.nboot)
+ # else:
+ # self.bootResult = self.genotype.bootstrap(strains = _strains, trait = _vals,
+ # nboot=fd.nboot)
+ # else:
+ # self.bootResult = []
+
diff --git a/wqflask/wqflask/my_pylmm/data/geno_to_ped.py b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py
new file mode 100644
index 00000000..9091ad9a
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/data/geno_to_ped.py
@@ -0,0 +1,22 @@
+from __future__ import absolute_import, division, print_function
+
+import csv
+
+class ConvertToPed(object):
+
+ def __init__(self, input_file, output_file):
+ self.input_file = input_file
+ self.output_file = output_file
+
+ def convert(self):
+
+ self.haplotype_notation = {
+ '@mat': "1",
+ '@pat': "0",
+ '@het': "0.5",
+ '@unk': "NA"
+ }
+
+ with open(self.output_file, "w") as self.output_fh:
+ self.process_csv()
+ \ No newline at end of file