about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZachary Sloan2014-08-25 17:13:06 +0000
committerZachary Sloan2014-08-25 17:13:06 +0000
commit7605d126bec2ea63dd36b7a786617d15901b4a1a (patch)
tree7c372ced1948e9b0feb012036b1a36c31caab737
parent7ff0dcda04dc853620bb5162180f18db596f1574 (diff)
downloadgenenetwork2-7605d126bec2ea63dd36b7a786617d15901b4a1a.tar.gz
Added permutations to pyLMM
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py1614
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee1
-rwxr-xr-xwqflask/wqflask/templates/show_trait_mapping_tools.html6
3 files changed, 846 insertions, 775 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index bb67a3ba..a1b600a4 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -1,775 +1,839 @@
-from __future__ import absolute_import, print_function, division

-

-from base.trait import GeneralTrait

-from base import data_set  #import create_dataset

-

-from pprint import pformat as pf

-

-import string

-import sys

-import datetime

-import os

-import collections

-import uuid

-

-import rpy2.robjects as robjects

-import numpy as np

-from scipy import linalg

-

-import cPickle as pickle

-

-import simplejson as json

-

-from redis import Redis

-Redis = Redis()

-

-from flask import Flask, g

-

-from base.trait import GeneralTrait

-from base import data_set

-from base import species

-from base import webqtlConfig

-from utility import webqtlUtil

-from wqflask.my_pylmm.data import prep_data

-from wqflask.my_pylmm.pyLMM import lmm

-from wqflask.my_pylmm.pyLMM import input

-from utility import helper_functions

-from utility import Plot, Bunch

-from utility import temp_data

-

-from utility.benchmark import Bench

-

-

-class MarkerRegression(object):

-

-    def __init__(self, start_vars, temp_uuid):

-

-        helper_functions.get_species_dataset_trait(self, start_vars)

-

-        #tempdata = temp_data.TempData(temp_uuid)

-        

-        self.json_data = {}

-        self.json_data['lodnames'] = ['lod.hk']

-        

-        self.samples = [] # Want only ones with values

-        self.vals = []

-

-        for sample in self.dataset.group.samplelist:

-            value = start_vars['value:' + sample]

-            self.samples.append(str(sample))

-            self.vals.append(value)

- 

-        self.mapping_method = start_vars['method']

-        if start_vars['manhattan_plot'] == "true":

-            self.manhattan_plot = True

-        else:

-            self.manhattan_plot = False

-        self.maf = start_vars['maf'] # Minor allele frequency

-        self.suggestive = ""

-        self.significant = ""

-        #print("self.maf:", self.maf)

- 

-        self.dataset.group.get_markers()

-        if self.mapping_method == "gemma":

-            qtl_results = self.run_gemma()

-        elif self.mapping_method == "rqtl_plink":

-            qtl_results = self.run_rqtl_plink()

-        elif self.mapping_method == "rqtl_geno":

-            self.num_perm = start_vars['num_perm']

-            self.control = start_vars['control_marker']

-

-            print("doing rqtl_geno")

-            qtl_results = self.run_rqtl_geno()

-            print("qtl_results:", qtl_results)

-        elif self.mapping_method == "plink":

-            qtl_results = self.run_plink()

-            #print("qtl_results:", pf(qtl_results))

-        elif self.mapping_method == "pylmm":

-            print("RUNNING PYLMM")

-            #self.qtl_results = self.gen_data(tempdata)

-            qtl_results = self.gen_data(str(temp_uuid))

-        else:

-            print("RUNNING NOTHING")

-            

-        self.lod_cutoff = 2    

-        self.filtered_markers = []

-        highest_chr = 1 #This is needed in order to convert the highest chr to X/Y

-        for marker in qtl_results:

-            if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":

-                if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":

-                    highest_chr = marker['chr']

-                if 'lod_score' in marker:

-                    self.filtered_markers.append(marker)

-

-        self.json_data['chr'] = []

-        self.json_data['pos'] = []

-        self.json_data['lod.hk'] = []

-        self.json_data['markernames'] = []

-

-        self.json_data['suggestive'] = self.suggestive

-        self.json_data['significant'] = self.significant

-

-        #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary

-        self.qtl_results = []

-        for qtl in self.filtered_markers:

-            print("lod score is:", qtl['lod_score'])

-            if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":

-                print("changing to X")

-                self.json_data['chr'].append("X")

-            else:

-                self.json_data['chr'].append(str(qtl['chr']))

-            self.json_data['pos'].append(qtl['Mb'])

-            self.json_data['lod.hk'].append(str(qtl['lod_score']))

-            self.json_data['markernames'].append(qtl['name'])

-

-        #Get chromosome lengths for drawing the interval map plot

-        chromosome_mb_lengths = {}

-        self.json_data['chrnames'] = []

-        for key in self.species.chromosomes.chromosomes.keys():

-            self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])

-            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

-        

-        print("json_data:", self.json_data)

-        

-

-        self.js_data = dict(

-            json_data = self.json_data,

-            this_trait = self.this_trait.name,

-            data_set = self.dataset.name,

-            maf = self.maf,

-            manhattan_plot = self.manhattan_plot,

-            chromosomes = chromosome_mb_lengths,

-            qtl_results = self.filtered_markers,

-        )

-

-    def run_gemma(self):

-        """Generates p-values for each marker using GEMMA"""

-        

-        #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))

-        self.gen_pheno_txt_file()

-

-        os.chdir("/home/zas1024/gene/web/gemma")

-

-        gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (

-                                                                                                 self.dataset.group.name,

-                                                                                                 self.dataset.group.name,

-                                                                                                 self.dataset.group.name)

-        print("gemma_command:" + gemma_command)

-        

-        os.system(gemma_command)

-        

-        included_markers, p_values = self.parse_gemma_output()

-        

-        self.dataset.group.get_specified_markers(markers = included_markers)

-        

-        #for marker in self.dataset.group.markers.markers:

-        #    if marker['name'] not in included_markers:

-        #        print("marker:", marker)

-        #        self.dataset.group.markers.markers.remove(marker)

-        #        #del self.dataset.group.markers.markers[marker]

-        

-        self.dataset.group.markers.add_pvalues(p_values)

-

-        return self.dataset.group.markers.markers

-

-

-    def parse_gemma_output(self):

-        included_markers = []

-        p_values = []

-        with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:

-            for line in output_file:

-                if line.startswith("chr"):

-                    continue

-                else:

-                    included_markers.append(line.split("\t")[1])

-                    p_values.append(float(line.split("\t")[10]))

-                    #p_values[line.split("\t")[1]] = float(line.split("\t")[10])

-        print("p_values: ", p_values)

-        return included_markers, p_values

-

-    def gen_pheno_txt_file(self):

-        """Generates phenotype file for GEMMA"""

-        

-        #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile:

-        #    for sample, i in enumerate(self.samples):

-        #        print("sample:" + str(i))

-        #        print("self.vals[i]:" + str(self.vals[sample]))

-        #        outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n")

-                

-        with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile:

-            for i, sample in enumerate(self.samples):

-                outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n")

-

-    #def gen_plink_for_gemma(self, filename):

-    #    

-    #    make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH,

-    #                                                                                                                                             webqtlConfig.HTMLPATH,

-    #                                                                                                                                             self.dataset.group.name,

-    #                                                                                                                                             webqtlConfig.TMPDIR,

-    #                                                                                                                                             filename,

-    #                                                                                                                                             webqtlConfig.TMPDIR,

-    #                                                                                                                                             filename)

-    #

-    #

-    

-    def run_rqtl_plink(self):

-        os.chdir("/home/zas1024/plink")

-        

-        output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))

-        

-        self.gen_pheno_txt_file_plink(pheno_filename = output_filename)

-        

-        rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)

-        

-        os.system(rqtl_command)

-        

-        count, p_values = self.parse_rqtl_output(plink_output_filename)

-    

-    def run_rqtl_geno(self):

-        robjects.packages.importr("qtl")

-        robjects.r('the_cross <- read.cross(format="csvr", dir="/home/zas1024/PLINK2RQTL/test", file="BXD.csvr")')

-        if self.manhattan_plot:

-            robjects.r('the_cross <- calc.genoprob(the_cross)')

-        else:

-            robjects.r('the_cross <- calc.genoprob(the_cross, step=1, stepwidth="max")')

-        pheno_as_string = "c("

-        #for i, val in enumerate(self.vals):

-        #    if val == "x":

-        #        new_val == "NULL"

-        #    else:

-        #        new_val = val

-        #    if i < (len(self.vals) - 1):

-        #        pheno_as_string += str(new_val) + ","

-        #    else: pheno_as_string += str(new_val)

-        null_pos = []

-        for i, val in enumerate(self.vals):

-            if val == "x":

-                null_pos.append(i)

-                if i < (len(self.vals) - 1):

-                    pheno_as_string +=  "NA,"

-                else:

-                    pheno_as_string += "NA"

-            else:

-                if i < (len(self.vals) - 1):

-                    pheno_as_string += str(val) + ","

-                else:

-                    pheno_as_string += str(val)

-            

-        pheno_as_string += ")"

-        

-        robjects.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')

-        

-        print("self.control:", self.control)

-        

-        if self.control != "":

-            control_markers = self.control.split(",")

-            control_string = ""

-            for i, control in enumerate(control_markers):

-                control_trait = GeneralTrait(name=str(control), dataset_name=str(self.dataset.group.name + "Geno"))

-                control_vals = []

-                for sample in self.dataset.group.samplelist:

-                    if sample in control_trait.data:

-                        control_vals.append(control_trait.data[sample].value)

-                    else:

-                        control_vals.append("x")

-                print("control_vals:", control_vals)

-                control_as_string = "c("

-                for j, val2 in enumerate(control_vals):

-                    if val2 == "x":

-                        if j < (len(control_vals) - 1):

-                            control_as_string +=  "NA,"

-                        else:

-                            control_as_string += "NA"

-                    else:

-                        if j < (len(control_vals) - 1):

-                            control_as_string += str(val2) + ","

-                        else:

-                            control_as_string += str(val2)

-                    #if i < (len(control_vals) - 1):

-                    #    control_as_string += str(new_val2) + ","

-                    #else:

-                    #    control_as_string += str(new_val2)

-                control_as_string += ")"

-                print("control_as_string:", control_as_string)

-                if i < (len(control_markers)-1):

-                    control_string += control_as_string + ","

-                else:

-                    control_string += control_as_string

-                    

-            robjects.r('covariates <- cbind( '+ control_string +')')

-            

-            r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'

-            print("r_string:", r_string)

-            

-            if int(self.num_perm) > 0:

-                thresholds = robjects.r(r_string)

-                self.suggestive, self.significant = self.process_rqtl_perm_results(results)

-                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'

-                

-            #r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', addcovar='+control_as_string+')'

-            

-        else:

-        #r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', n.perm='+self.num_perm+')'

-            r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+')'

-            if self.num_perm.isdigit() and int(self.num_perm) > 0:

-                results = robjects.r(r_string)

-                self.suggestive, self.significant = self.process_rqtl_perm_results(results)

-                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16)'

-            

-        print("r_string:", r_string)

-        result_data_frame = robjects.r(r_string)

-        #print("results:", result_data_frame)

-

-        qtl_results = self.process_rqtl_results(result_data_frame)

-

-        return qtl_results

-    

-    def process_rqtl_perm_results(self, results):

-

-        perm_vals = []

-        for line in str(results).split("\n")[1:(int(self.num_perm)+1)]:

-            print("line:", line.split())

-            perm_vals.append(float(line.split()[1]))

-        

-        self.suggestive = np.percentile(np.array(perm_vals), 67)

-        self.significant = np.percentile(np.array(perm_vals), 95)

-        

-        return self.suggestive, self.significant

-            

-    

-    def process_rqtl_results(self, result):

-        qtl_results = []

-        

-        output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]

-        print("output", output)

-        

-        

-        for i, line in enumerate(result.iter_row()):

-            marker = {}

-            marker['name'] = result.rownames[i]

-            marker['chr'] = output[i][0]

-            marker['Mb'] = output[i][1]

-            marker['lod_score'] = output[i][2]

-            

-            qtl_results.append(marker)

-            

-        return qtl_results

-

-    def run_plink(self):

-    

-        os.chdir("/home/zas1024/plink")

-        

-        plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))

-        

-        self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)

-        

-        plink_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)

-        

-        os.system(plink_command)

-

-        count, p_values = self.parse_plink_output(plink_output_filename)

-        #gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (

-        #                                                                                         self.dataset.group.name,

-        #                                                                                         self.dataset.group.name,

-        #                                                                                         self.dataset.group.name)

-        #print("gemma_command:" + gemma_command)

-        #

-        #os.system(gemma_command)

-        #

-        #included_markers, p_values = self.parse_gemma_output()

-        #

-        #self.dataset.group.get_specified_markers(markers = included_markers)

-        

-        #for marker in self.dataset.group.markers.markers:

-        #    if marker['name'] not in included_markers:

-        #        print("marker:", marker)

-        #        self.dataset.group.markers.markers.remove(marker)

-        #        #del self.dataset.group.markers.markers[marker]

-        

-        print("p_values:", pf(p_values))

-        

-        self.dataset.group.markers.add_pvalues(p_values)

-

-        return self.dataset.group.markers.markers

-    

-    

-    def gen_pheno_txt_file_plink(self, pheno_filename = ''):

-        ped_sample_list = self.get_samples_from_ped_file()	

-        output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")

-        header = 'FID\tIID\t%s\n' % self.this_trait.name

-        output_file.write(header)

-    

-        new_value_list = []

-        

-        #if valueDict does not include some strain, value will be set to -9999 as missing value

-        for i, sample in enumerate(ped_sample_list):

-            try:

-                value = self.vals[i]

-                value = str(value).replace('value=','')

-                value = value.strip()

-            except:

-                value = -9999

-    

-            new_value_list.append(value)

-            

-            

-        new_line = ''

-        for i, sample in enumerate(ped_sample_list):

-            j = i+1

-            value = new_value_list[i]

-            new_line += '%s\t%s\t%s\n'%(sample, sample, value)

-            

-            if j%1000 == 0:

-                output_file.write(newLine)

-                new_line = ''

-        

-        if new_line:

-            output_file.write(new_line)

-            

-        output_file.close()

-        

-    def gen_pheno_txt_file_rqtl(self, pheno_filename = ''):

-        ped_sample_list = self.get_samples_from_ped_file()	

-        output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")

-        header = 'FID\tIID\t%s\n' % self.this_trait.name

-        output_file.write(header)

-    

-        new_value_list = []

-        

-        #if valueDict does not include some strain, value will be set to -9999 as missing value

-        for i, sample in enumerate(ped_sample_list):

-            try:

-                value = self.vals[i]

-                value = str(value).replace('value=','')

-                value = value.strip()

-            except:

-                value = -9999

-    

-            new_value_list.append(value)

-            

-            

-        new_line = ''

-        for i, sample in enumerate(ped_sample_list):

-            j = i+1

-            value = new_value_list[i]

-            new_line += '%s\t%s\t%s\n'%(sample, sample, value)

-            

-            if j%1000 == 0:

-                output_file.write(newLine)

-                new_line = ''

-        

-        if new_line:

-            output_file.write(new_line)

-            

-        output_file.close()

-    

-    # get strain name from ped file in order

-    def get_samples_from_ped_file(self):

-        

-        os.chdir("/home/zas1024/plink")

-        

-        ped_file= open("{}.ped".format(self.dataset.group.name),"r")

-        line = ped_file.readline()

-        sample_list=[]

-        

-        while line:

-            lineList = string.split(string.strip(line), '\t')

-            lineList = map(string.strip, lineList)

-            

-            sample_name = lineList[0]

-            sample_list.append(sample_name)

-            

-            line = ped_file.readline()

-        

-        return sample_list

-    

-    def parse_plink_output(self, output_filename):

-        plink_results={}

-    

-        threshold_p_value = 0.01

-    

-        result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb")

-        

-        header_line = result_fp.readline()# read header line

-        line = result_fp.readline()

-        

-        value_list = [] # initialize value list, this list will include snp, bp and pvalue info

-        p_value_dict = {}

-        count = 0

-        

-        while line:

-            #convert line from str to list

-            line_list = self.build_line_list(line=line)

-    

-            # only keep the records whose chromosome name is in db

-            if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':

-    

-                chr_name = self.species.chromosomes.chromosomes[int(line_list[0])]

-                snp = line_list[1]

-                BP = line_list[2]

-                p_value = float(line_list[-1])

-                if threshold_p_value >= 0 and threshold_p_value <= 1:

-                    if p_value < threshold_p_value:

-                        p_value_dict[snp] = p_value

-                

-                if plink_results.has_key(chr_name):

-                    value_list = plink_results[chr_name]

-                    

-                    # pvalue range is [0,1]

-                    if threshold_p_value >=0 and threshold_p_value <= 1:

-                        if p_value < threshold_p_value:

-                            value_list.append((snp, BP, p_value))

-                            count += 1

-                    

-                    plink_results[chr_name] = value_list

-                    value_list = []

-                else:

-                    if threshold_p_value >= 0 and threshold_p_value <= 1:

-                        if p_value < threshold_p_value:

-                            value_list.append((snp, BP, p_value))

-                            count += 1

-

-                    if value_list:

-                        plink_results[chr_name] = value_list

-

-                    value_list=[]

-

-                line = result_fp.readline()

-            else:

-                line = result_fp.readline()

-

-        #if p_value_list:

-        #    min_p_value = min(p_value_list)

-        #else:

-        #    min_p_value = 0

-            

-        return count, p_value_dict

-    

-    ######################################################

-    # input: line: str,one line read from file

-    # function: convert line from str to list; 

-    # output: lineList list

-    #######################################################

-    def build_line_list(self, line=None):

-        

-        line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns

-        line_list = [item for item in line_list if item <>'']

-        line_list = map(string.strip, line_list)

-    

-        return line_list

-    

-    #def gen_data(self, tempdata):

-    def gen_data(self, temp_uuid):

-        """Generates p-values for each marker"""

-

-        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])

-

-        #lmm_uuid = str(uuid.uuid4())

-

-        key = "pylmm:input:" + temp_uuid

-        print("key is:", pf(key))

-        #with Bench("Loading cache"):

-        #    result = Redis.get(key)

-

-        if self.dataset.group.species == "human":

-            p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)

-            #p_values = self.trim_results(p_values)

-            

-        else:

-            print("NOW CWD IS:", os.getcwd())

-            genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]

-            

-            no_val_samples = self.identify_empty_samples()

-            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)

-            

-            genotype_matrix = np.array(trimmed_genotype_data).T

-

-            #print("pheno_vector: ", pf(pheno_vector))

-            #print("genotype_matrix: ", pf(genotype_matrix))

-            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))

-

-            #params = {"pheno_vector": pheno_vector,

-            #            "genotype_matrix": genotype_matrix,

-            #            "restricted_max_likelihood": True,

-            #            "refit": False,

-            #            "temp_data": tempdata}

-            

-            print("genotype_matrix:", str(genotype_matrix.tolist()))

-            print("pheno_vector:", str(pheno_vector.tolist()))

-            

-            params = dict(pheno_vector = pheno_vector.tolist(),

-                        genotype_matrix = genotype_matrix.tolist(),

-                        restricted_max_likelihood = True,

-                        refit = False,

-                        temp_uuid = temp_uuid,

-                        

-                        # meta data

-                        timestamp = datetime.datetime.now().isoformat(),

-                        )

-            

-            json_params = json.dumps(params)

-            #print("json_params:", json_params)

-            Redis.set(key, json_params)

-            Redis.expire(key, 60*60)

-            print("before printing command")

-

-            command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,

-                                                                                                                    "other")

-            print("command is:", command)

-            print("after printing command")

-

-            os.system(command)

-

-            #t_stats, p_values = lmm.run(key)

-            #lmm.run(key)

-            

-            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)

-            results = json.loads(json_results[1])

-            p_values = [float(result) for result in results['p_values']]

-            print("p_values:", p_values)

-            #p_values = self.trim_results(p_values)

-            t_stats = results['t_stats']

-            

-            #t_stats, p_values = lmm.run(

-            #    pheno_vector,

-            #    genotype_matrix,

-            #    restricted_max_likelihood=True,

-            #    refit=False,

-            #    temp_data=tempdata

-            #)

-            #print("p_values:", p_values)

-

-        self.dataset.group.markers.add_pvalues(p_values)

-        

-        #self.get_lod_score_cutoff()

-        

-        return self.dataset.group.markers.markers

-

-    def trim_results(self, p_values):

-        print("len_p_values:", len(p_values))

-        if len(p_values) > 500:

-            p_values.sort(reverse=True)

-            trimmed_values = p_values[:500]

-        

-        return trimmed_values

-

-    #def gen_human_results(self, pheno_vector, tempdata):

-    def gen_human_results(self, pheno_vector, key, temp_uuid):

-        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)

-

-        plink_input = input.plink(file_base, type='b')

-        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")

-

-        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))

-        covariate_matrix = np.ones((pheno_vector.shape[0],1))

-        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")

-        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))

-

-        print("Before creating params")

-

-        params = dict(pheno_vector = pheno_vector.tolist(),

-                    covariate_matrix = covariate_matrix.tolist(),

-                    input_file_name = input_file_name,

-                    kinship_matrix = kinship_matrix.tolist(),

-                    refit = False,

-                    temp_uuid = temp_uuid,

-                        

-                    # meta data

-                    timestamp = datetime.datetime.now().isoformat(),

-                    )

-        

-        print("After creating params")

-        

-        json_params = json.dumps(params)

-        Redis.set(key, json_params)

-        Redis.expire(key, 60*60)

-

-        print("Before creating the command")

-

-        command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,

-                                                                                                                "human")

-        

-        print("command is:", command)

-        

-        os.system(command)

-        

-        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)

-        results = json.loads(json_results[1])

-        t_stats = results['t_stats']

-        p_values = results['p_values']

-        

-

-        #p_values, t_stats = lmm.run_human(key)

-

-        #p_values, t_stats = lmm.run_human(

-        #        pheno_vector,

-        #        covariate_matrix,

-        #        input_file_name,

-        #        kinship_matrix,

-        #        loading_progress=tempdata

-        #    )

-

-        return p_values, t_stats

-

-    def get_lod_score_cutoff(self):

-        print("INSIDE GET LOD CUTOFF")

-        high_qtl_count = 0

-        for marker in self.dataset.group.markers.markers:

-            if marker['lod_score'] > 1:

-                high_qtl_count += 1

-

-        if high_qtl_count > 1000:

-            return 1

-        else:

-            return 0

-

-    def identify_empty_samples(self):

-        no_val_samples = []

-        for sample_count, val in enumerate(self.vals):

-            if val == "x":

-                no_val_samples.append(sample_count)

-        return no_val_samples

-        

-    def trim_genotypes(self, genotype_data, no_value_samples):

-        trimmed_genotype_data = []

-        for marker in genotype_data:

-            new_genotypes = []

-            for item_count, genotype in enumerate(marker):

-                if item_count in no_value_samples:

-                    continue

-                try:

-                    genotype = float(genotype)

-                except ValueError:

-                    genotype = np.nan

-                    pass

-                new_genotypes.append(genotype)

-            trimmed_genotype_data.append(new_genotypes)

-        return trimmed_genotype_data

-    

-def create_snp_iterator_file(group):

-    plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)

-    plink_input = input.plink(plink_file_base, type='b')

-    

-    data = dict(plink_input = list(plink_input),

-                numSNPs = plink_input.numSNPs)

-    

-    #input_dict = {}

-    #

-    #input_dict['plink_input'] = list(plink_input)

-    #input_dict['numSNPs'] = plink_input.numSNPs

-    #

-    

-    snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz")

-    

-    with gzip.open(snp_file_base, "wb") as fh:

-        pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL)

-

-#if __name__ == '__main__':

-#    import cPickle as pickle

-#    import gzip

-#    create_snp_iterator_file("HLC")

-    

-if __name__ == '__main__':

-    import cPickle as pickle

-    import gzip

-    create_snp_iterator_file("HLC")

+from __future__ import absolute_import, print_function, division
+
+from base.trait import GeneralTrait
+from base import data_set  #import create_dataset
+
+from pprint import pformat as pf
+
+import string
+import math
+import sys
+import datetime
+import os
+import collections
+import uuid
+
+import rpy2.robjects as robjects
+import numpy as np
+from scipy import linalg
+
+import cPickle as pickle
+
+import simplejson as json
+
+from redis import Redis
+Redis = Redis()
+
+from flask import Flask, g
+
+from base.trait import GeneralTrait
+from base import data_set
+from base import species
+from base import webqtlConfig
+from utility import webqtlUtil
+from wqflask.my_pylmm.data import prep_data
+from wqflask.my_pylmm.pyLMM import lmm
+from wqflask.my_pylmm.pyLMM import input
+from utility import helper_functions
+from utility import Plot, Bunch
+from utility import temp_data
+
+from utility.benchmark import Bench
+
+
+class MarkerRegression(object):
+
+    def __init__(self, start_vars, temp_uuid):
+
+        helper_functions.get_species_dataset_trait(self, start_vars)
+
+        #tempdata = temp_data.TempData(temp_uuid)
+        
+        self.json_data = {}
+        self.json_data['lodnames'] = ['lod.hk']
+        
+        self.samples = [] # Want only ones with values
+        self.vals = []
+
+        for sample in self.dataset.group.samplelist:
+            value = start_vars['value:' + sample]
+            self.samples.append(str(sample))
+            self.vals.append(value)
+ 
+        self.mapping_method = start_vars['method']
+        if start_vars['manhattan_plot'] == "true":
+            self.manhattan_plot = True
+        else:
+            self.manhattan_plot = False
+        self.maf = start_vars['maf'] # Minor allele frequency
+        self.suggestive = ""
+        self.significant = ""
+        #print("self.maf:", self.maf)
+ 
+        self.dataset.group.get_markers()
+        if self.mapping_method == "gemma":
+            qtl_results = self.run_gemma()
+        elif self.mapping_method == "rqtl_plink":
+            qtl_results = self.run_rqtl_plink()
+        elif self.mapping_method == "rqtl_geno":
+            self.num_perm = start_vars['num_perm']
+            self.control = start_vars['control_marker']
+
+            print("doing rqtl_geno")
+            qtl_results = self.run_rqtl_geno()
+            print("qtl_results:", qtl_results)
+        elif self.mapping_method == "plink":
+            qtl_results = self.run_plink()
+            #print("qtl_results:", pf(qtl_results))
+        elif self.mapping_method == "pylmm":
+            print("RUNNING PYLMM")
+            self.num_perm = start_vars['num_perm']
+            if int(self.num_perm) > 0:
+	         self.run_permutations(str(temp_uuid))
+            qtl_results = self.gen_data(str(temp_uuid))
+        else:
+            print("RUNNING NOTHING")
+            
+        self.lod_cutoff = 2    
+        self.filtered_markers = []
+        highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
+        for marker in qtl_results:
+            if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
+                if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
+                    highest_chr = marker['chr']
+                if 'lod_score' in marker:
+                    self.filtered_markers.append(marker)
+
+        self.json_data['chr'] = []
+        self.json_data['pos'] = []
+        self.json_data['lod.hk'] = []
+        self.json_data['markernames'] = []
+
+        self.json_data['suggestive'] = self.suggestive
+        self.json_data['significant'] = self.significant
+
+        #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
+        self.qtl_results = []
+        for qtl in self.filtered_markers:
+            print("lod score is:", qtl['lod_score'])
+            if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
+                print("changing to X")
+                self.json_data['chr'].append("X")
+            else:
+                self.json_data['chr'].append(str(qtl['chr']))
+            self.json_data['pos'].append(qtl['Mb'])
+            self.json_data['lod.hk'].append(str(qtl['lod_score']))
+            self.json_data['markernames'].append(qtl['name'])
+
+        #Get chromosome lengths for drawing the interval map plot
+        chromosome_mb_lengths = {}
+        self.json_data['chrnames'] = []
+        for key in self.species.chromosomes.chromosomes.keys():
+            self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
+            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
+        
+        print("json_data:", self.json_data)
+        
+
+        self.js_data = dict(
+            json_data = self.json_data,
+            this_trait = self.this_trait.name,
+            data_set = self.dataset.name,
+            maf = self.maf,
+            manhattan_plot = self.manhattan_plot,
+            chromosomes = chromosome_mb_lengths,
+            qtl_results = self.filtered_markers,
+        )
+
+    def run_gemma(self):
+        """Generates p-values for each marker using GEMMA"""
+        
+        #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))
+        self.gen_pheno_txt_file()
+
+        os.chdir("/home/zas1024/gene/web/gemma")
+
+        gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (
+                                                                                                 self.dataset.group.name,
+                                                                                                 self.dataset.group.name,
+                                                                                                 self.dataset.group.name)
+        print("gemma_command:" + gemma_command)
+        
+        os.system(gemma_command)
+        
+        included_markers, p_values = self.parse_gemma_output()
+        
+        self.dataset.group.get_specified_markers(markers = included_markers)
+        
+        #for marker in self.dataset.group.markers.markers:
+        #    if marker['name'] not in included_markers:
+        #        print("marker:", marker)
+        #        self.dataset.group.markers.markers.remove(marker)
+        #        #del self.dataset.group.markers.markers[marker]
+        
+        self.dataset.group.markers.add_pvalues(p_values)
+
+        return self.dataset.group.markers.markers
+
+
+    def parse_gemma_output(self):
+        included_markers = []
+        p_values = []
+        with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:
+            for line in output_file:
+                if line.startswith("chr"):
+                    continue
+                else:
+                    included_markers.append(line.split("\t")[1])
+                    p_values.append(float(line.split("\t")[10]))
+                    #p_values[line.split("\t")[1]] = float(line.split("\t")[10])
+        print("p_values: ", p_values)
+        return included_markers, p_values
+
+    def gen_pheno_txt_file(self):
+        """Generates phenotype file for GEMMA"""
+        
+        #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile:
+        #    for sample, i in enumerate(self.samples):
+        #        print("sample:" + str(i))
+        #        print("self.vals[i]:" + str(self.vals[sample]))
+        #        outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n")
+                
+        with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile:
+            for i, sample in enumerate(self.samples):
+                outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n")
+
+    #def gen_plink_for_gemma(self, filename):
+    #    
+    #    make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH,
+    #                                                                                                                                             webqtlConfig.HTMLPATH,
+    #                                                                                                                                             self.dataset.group.name,
+    #                                                                                                                                             webqtlConfig.TMPDIR,
+    #                                                                                                                                             filename,
+    #                                                                                                                                             webqtlConfig.TMPDIR,
+    #                                                                                                                                             filename)
+    #
+    #
+    
+    def run_rqtl_plink(self):
+        os.chdir("/home/zas1024/plink")
+        
+        output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
+        
+        self.gen_pheno_txt_file_plink(pheno_filename = output_filename)
+        
+        rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)
+        
+        os.system(rqtl_command)
+        
+        count, p_values = self.parse_rqtl_output(plink_output_filename)
+    
+    def run_rqtl_geno(self):
+        robjects.packages.importr("qtl")
+        robjects.r('the_cross <- read.cross(format="csvr", dir="/home/zas1024/PLINK2RQTL/test", file="BXD.csvr")')
+        if self.manhattan_plot:
+            robjects.r('the_cross <- calc.genoprob(the_cross)')
+        else:
+            robjects.r('the_cross <- calc.genoprob(the_cross, step=1, stepwidth="max")')
+        pheno_as_string = "c("
+        #for i, val in enumerate(self.vals):
+        #    if val == "x":
+        #        new_val == "NULL"
+        #    else:
+        #        new_val = val
+        #    if i < (len(self.vals) - 1):
+        #        pheno_as_string += str(new_val) + ","
+        #    else: pheno_as_string += str(new_val)
+        null_pos = []
+        for i, val in enumerate(self.vals):
+            if val == "x":
+                null_pos.append(i)
+                if i < (len(self.vals) - 1):
+                    pheno_as_string +=  "NA,"
+                else:
+                    pheno_as_string += "NA"
+            else:
+                if i < (len(self.vals) - 1):
+                    pheno_as_string += str(val) + ","
+                else:
+                    pheno_as_string += str(val)
+            
+        pheno_as_string += ")"
+        
+        robjects.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+        
+        print("self.control:", self.control)
+        
+        if self.control != "":
+            control_markers = self.control.split(",")
+            control_string = ""
+            for i, control in enumerate(control_markers):
+                control_trait = GeneralTrait(name=str(control), dataset_name=str(self.dataset.group.name + "Geno"))
+                control_vals = []
+                for sample in self.dataset.group.samplelist:
+                    if sample in control_trait.data:
+                        control_vals.append(control_trait.data[sample].value)
+                    else:
+                        control_vals.append("x")
+                print("control_vals:", control_vals)
+                control_as_string = "c("
+                for j, val2 in enumerate(control_vals):
+                    if val2 == "x":
+                        if j < (len(control_vals) - 1):
+                            control_as_string +=  "NA,"
+                        else:
+                            control_as_string += "NA"
+                    else:
+                        if j < (len(control_vals) - 1):
+                            control_as_string += str(val2) + ","
+                        else:
+                            control_as_string += str(val2)
+                    #if i < (len(control_vals) - 1):
+                    #    control_as_string += str(new_val2) + ","
+                    #else:
+                    #    control_as_string += str(new_val2)
+                control_as_string += ")"
+                print("control_as_string:", control_as_string)
+                if i < (len(control_markers)-1):
+                    control_string += control_as_string + ","
+                else:
+                    control_string += control_as_string
+                    
+            robjects.r('covariates <- cbind( '+ control_string +')')
+            
+            r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+', addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+            print("r_string:", r_string)
+            
+            if int(self.num_perm) > 0:
+                thresholds = robjects.r(r_string)
+                self.suggestive, self.significant = self.process_rqtl_perm_results(results)
+                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, addcovar=covariates, intcovar=covariates[,'+ str(len(control_markers)) +'])'
+                
+            #r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', addcovar='+control_as_string+')'
+            
+        else:
+        #r_string = 'scanone(the_cross, pheno.col='+pheno_as_string+', n.perm='+self.num_perm+')'
+            r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16, n.perm='+self.num_perm+')'
+            if self.num_perm.isdigit() and int(self.num_perm) > 0:
+                results = robjects.r(r_string)
+                self.suggestive, self.significant = self.process_rqtl_perm_results(results)
+                r_string = 'scanone(the_cross, pheno.col="the_pheno", n.cluster=16)'
+            
+        print("r_string:", r_string)
+        result_data_frame = robjects.r(r_string)
+        #print("results:", result_data_frame)
+
+        qtl_results = self.process_rqtl_results(result_data_frame)
+
+        return qtl_results
+    
+    def process_rqtl_perm_results(self, results):
+
+        perm_vals = []
+        for line in str(results).split("\n")[1:(int(self.num_perm)+1)]:
+            print("line:", line.split())
+            perm_vals.append(float(line.split()[1]))
+        
+        self.suggestive = np.percentile(np.array(perm_vals), 67)
+        self.significant = np.percentile(np.array(perm_vals), 95)
+        
+        return self.suggestive, self.significant
+            
+    
+    def process_rqtl_results(self, result):
+        qtl_results = []
+        
+        output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+        print("output", output)
+        
+        
+        for i, line in enumerate(result.iter_row()):
+            marker = {}
+            marker['name'] = result.rownames[i]
+            marker['chr'] = output[i][0]
+            marker['Mb'] = output[i][1]
+            marker['lod_score'] = output[i][2]
+            
+            qtl_results.append(marker)
+            
+        return qtl_results
+
+    def run_plink(self):
+    
+        os.chdir("/home/zas1024/plink")
+        
+        plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
+        
+        self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)
+        
+        plink_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)
+        
+        os.system(plink_command)
+
+        count, p_values = self.parse_plink_output(plink_output_filename)
+        #gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (
+        #                                                                                         self.dataset.group.name,
+        #                                                                                         self.dataset.group.name,
+        #                                                                                         self.dataset.group.name)
+        #print("gemma_command:" + gemma_command)
+        #
+        #os.system(gemma_command)
+        #
+        #included_markers, p_values = self.parse_gemma_output()
+        #
+        #self.dataset.group.get_specified_markers(markers = included_markers)
+        
+        #for marker in self.dataset.group.markers.markers:
+        #    if marker['name'] not in included_markers:
+        #        print("marker:", marker)
+        #        self.dataset.group.markers.markers.remove(marker)
+        #        #del self.dataset.group.markers.markers[marker]
+        
+        print("p_values:", pf(p_values))
+        
+        self.dataset.group.markers.add_pvalues(p_values)
+
+        return self.dataset.group.markers.markers
+    
+    
+    def gen_pheno_txt_file_plink(self, pheno_filename = ''):
+        ped_sample_list = self.get_samples_from_ped_file()	
+        output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
+        header = 'FID\tIID\t%s\n' % self.this_trait.name
+        output_file.write(header)
+    
+        new_value_list = []
+        
+        #if valueDict does not include some strain, value will be set to -9999 as missing value
+        for i, sample in enumerate(ped_sample_list):
+            try:
+                value = self.vals[i]
+                value = str(value).replace('value=','')
+                value = value.strip()
+            except:
+                value = -9999
+    
+            new_value_list.append(value)
+            
+            
+        new_line = ''
+        for i, sample in enumerate(ped_sample_list):
+            j = i+1
+            value = new_value_list[i]
+            new_line += '%s\t%s\t%s\n'%(sample, sample, value)
+            
+            if j%1000 == 0:
+                output_file.write(newLine)
+                new_line = ''
+        
+        if new_line:
+            output_file.write(new_line)
+            
+        output_file.close()
+        
+    def gen_pheno_txt_file_rqtl(self, pheno_filename = ''):
+        ped_sample_list = self.get_samples_from_ped_file()	
+        output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
+        header = 'FID\tIID\t%s\n' % self.this_trait.name
+        output_file.write(header)
+    
+        new_value_list = []
+        
+        #if valueDict does not include some strain, value will be set to -9999 as missing value
+        for i, sample in enumerate(ped_sample_list):
+            try:
+                value = self.vals[i]
+                value = str(value).replace('value=','')
+                value = value.strip()
+            except:
+                value = -9999
+    
+            new_value_list.append(value)
+            
+            
+        new_line = ''
+        for i, sample in enumerate(ped_sample_list):
+            j = i+1
+            value = new_value_list[i]
+            new_line += '%s\t%s\t%s\n'%(sample, sample, value)
+            
+            if j%1000 == 0:
+                output_file.write(newLine)
+                new_line = ''
+        
+        if new_line:
+            output_file.write(new_line)
+            
+        output_file.close()
+    
+    # get strain name from ped file in order
+    def get_samples_from_ped_file(self):
+        
+        os.chdir("/home/zas1024/plink")
+        
+        ped_file= open("{}.ped".format(self.dataset.group.name),"r")
+        line = ped_file.readline()
+        sample_list=[]
+        
+        while line:
+            lineList = string.split(string.strip(line), '\t')
+            lineList = map(string.strip, lineList)
+            
+            sample_name = lineList[0]
+            sample_list.append(sample_name)
+            
+            line = ped_file.readline()
+        
+        return sample_list
+    
+    def parse_plink_output(self, output_filename):
+        plink_results={}
+    
+        threshold_p_value = 0.01
+    
+        result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb")
+        
+        header_line = result_fp.readline()# read header line
+        line = result_fp.readline()
+        
+        value_list = [] # initialize value list, this list will include snp, bp and pvalue info
+        p_value_dict = {}
+        count = 0
+        
+        while line:
+            #convert line from str to list
+            line_list = self.build_line_list(line=line)
+    
+            # only keep the records whose chromosome name is in db
+            if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
+    
+                chr_name = self.species.chromosomes.chromosomes[int(line_list[0])]
+                snp = line_list[1]
+                BP = line_list[2]
+                p_value = float(line_list[-1])
+                if threshold_p_value >= 0 and threshold_p_value <= 1:
+                    if p_value < threshold_p_value:
+                        p_value_dict[snp] = p_value
+                
+                if plink_results.has_key(chr_name):
+                    value_list = plink_results[chr_name]
+                    
+                    # pvalue range is [0,1]
+                    if threshold_p_value >=0 and threshold_p_value <= 1:
+                        if p_value < threshold_p_value:
+                            value_list.append((snp, BP, p_value))
+                            count += 1
+                    
+                    plink_results[chr_name] = value_list
+                    value_list = []
+                else:
+                    if threshold_p_value >= 0 and threshold_p_value <= 1:
+                        if p_value < threshold_p_value:
+                            value_list.append((snp, BP, p_value))
+                            count += 1
+
+                    if value_list:
+                        plink_results[chr_name] = value_list
+
+                    value_list=[]
+
+                line = result_fp.readline()
+            else:
+                line = result_fp.readline()
+
+        #if p_value_list:
+        #    min_p_value = min(p_value_list)
+        #else:
+        #    min_p_value = 0
+            
+        return count, p_value_dict
+    
+    ######################################################
+    # input: line: str,one line read from file
+    # function: convert line from str to list; 
+    # output: lineList list
+    #######################################################
+    def build_line_list(self, line=None):
+        
+        line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
+        line_list = [item for item in line_list if item <>'']
+        line_list = map(string.strip, line_list)
+    
+        return line_list
+    
+    
+    def run_permutations(self, temp_uuid):
+        """Runs permutations and gets significant and suggestive LOD scores"""
+
+        top_lod_scores = []
+	
+        print("self.num_perm:", self.num_perm)
+
+        for permutation in range(int(self.num_perm)):
+
+            pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+            np.random.shuffle(pheno_vector)
+
+            key = "pylmm:input:" + temp_uuid
+        
+            if self.dataset.group.species == "human":
+                p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
+            else:
+                genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+                
+                no_val_samples = self.identify_empty_samples()
+                trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+                
+                genotype_matrix = np.array(trimmed_genotype_data).T
+    
+                params = dict(pheno_vector = pheno_vector.tolist(),
+                            genotype_matrix = genotype_matrix.tolist(),
+                            restricted_max_likelihood = True,
+                            refit = False,
+                            temp_uuid = temp_uuid,
+                            
+                            # meta data
+                            timestamp = datetime.datetime.now().isoformat(),
+                            )
+                
+                json_params = json.dumps(params)
+                Redis.set(key, json_params)
+                Redis.expire(key, 60*60)
+    
+                command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+                                                                                                                        "other")
+    
+                os.system(command)
+    
+                
+                json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
+                results = json.loads(json_results[1])
+                p_values = [float(result) for result in results['p_values']]
+                
+                lowest_p_value = 1
+                for p_value in p_values:
+                    if p_value < lowest_p_value:
+                        lowest_p_value = p_value
+                
+                print("lowest_p_value:", lowest_p_value)        
+                top_lod_scores.append(-math.log10(lowest_p_value))
+
+        print("top_lod_scores:", top_lod_scores)
+
+        self.suggestive = np.percentile(top_lod_scores, 67)
+        self.significant = np.percentile(top_lod_scores, 95)
+
+    def gen_data(self, temp_uuid):
+        """Generates p-values for each marker"""
+
+        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+
+        #lmm_uuid = str(uuid.uuid4())
+
+        key = "pylmm:input:" + temp_uuid
+        print("key is:", pf(key))
+        #with Bench("Loading cache"):
+        #    result = Redis.get(key)
+
+        if self.dataset.group.species == "human":
+            p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
+            #p_values = self.trim_results(p_values)
+            
+        else:
+            print("NOW CWD IS:", os.getcwd())
+            genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+            
+            no_val_samples = self.identify_empty_samples()
+            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+            
+            genotype_matrix = np.array(trimmed_genotype_data).T
+
+            #print("pheno_vector: ", pf(pheno_vector))
+            #print("genotype_matrix: ", pf(genotype_matrix))
+            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
+
+            #params = {"pheno_vector": pheno_vector,
+            #            "genotype_matrix": genotype_matrix,
+            #            "restricted_max_likelihood": True,
+            #            "refit": False,
+            #            "temp_data": tempdata}
+            
+            print("genotype_matrix:", str(genotype_matrix.tolist()))
+            print("pheno_vector:", str(pheno_vector.tolist()))
+            
+            params = dict(pheno_vector = pheno_vector.tolist(),
+                        genotype_matrix = genotype_matrix.tolist(),
+                        restricted_max_likelihood = True,
+                        refit = False,
+                        temp_uuid = temp_uuid,
+                        
+                        # meta data
+                        timestamp = datetime.datetime.now().isoformat(),
+                        )
+            
+            json_params = json.dumps(params)
+            #print("json_params:", json_params)
+            Redis.set(key, json_params)
+            Redis.expire(key, 60*60)
+            print("before printing command")
+
+            command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+                                                                                                                    "other")
+            print("command is:", command)
+            print("after printing command")
+
+            os.system(command)
+
+            #t_stats, p_values = lmm.run(key)
+            #lmm.run(key)
+            
+            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
+            results = json.loads(json_results[1])
+            p_values = [float(result) for result in results['p_values']]
+            print("p_values:", p_values)
+            #p_values = self.trim_results(p_values)
+            t_stats = results['t_stats']
+            
+            #t_stats, p_values = lmm.run(
+            #    pheno_vector,
+            #    genotype_matrix,
+            #    restricted_max_likelihood=True,
+            #    refit=False,
+            #    temp_data=tempdata
+            #)
+            #print("p_values:", p_values)
+
+        self.dataset.group.markers.add_pvalues(p_values)
+        
+        #self.get_lod_score_cutoff()
+        
+        return self.dataset.group.markers.markers
+
+    def trim_results(self, p_values):
+        print("len_p_values:", len(p_values))
+        if len(p_values) > 500:
+            p_values.sort(reverse=True)
+            trimmed_values = p_values[:500]
+        
+        return trimmed_values
+
+    #def gen_human_results(self, pheno_vector, tempdata):
+    def gen_human_results(self, pheno_vector, key, temp_uuid):
+        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
+
+        plink_input = input.plink(file_base, type='b')
+        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
+
+        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
+        covariate_matrix = np.ones((pheno_vector.shape[0],1))
+        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
+        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
+
+        print("Before creating params")
+
+        params = dict(pheno_vector = pheno_vector.tolist(),
+                    covariate_matrix = covariate_matrix.tolist(),
+                    input_file_name = input_file_name,
+                    kinship_matrix = kinship_matrix.tolist(),
+                    refit = False,
+                    temp_uuid = temp_uuid,
+                        
+                    # meta data
+                    timestamp = datetime.datetime.now().isoformat(),
+                    )
+        
+        print("After creating params")
+        
+        json_params = json.dumps(params)
+        Redis.set(key, json_params)
+        Redis.expire(key, 60*60)
+
+        print("Before creating the command")
+
+        command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+                                                                                                                "human")
+        
+        print("command is:", command)
+        
+        os.system(command)
+        
+        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
+        results = json.loads(json_results[1])
+        t_stats = results['t_stats']
+        p_values = results['p_values']
+        
+
+        #p_values, t_stats = lmm.run_human(key)
+
+        #p_values, t_stats = lmm.run_human(
+        #        pheno_vector,
+        #        covariate_matrix,
+        #        input_file_name,
+        #        kinship_matrix,
+        #        loading_progress=tempdata
+        #    )
+
+        return p_values, t_stats
+
+    def get_lod_score_cutoff(self):
+        print("INSIDE GET LOD CUTOFF")
+        high_qtl_count = 0
+        for marker in self.dataset.group.markers.markers:
+            if marker['lod_score'] > 1:
+                high_qtl_count += 1
+
+        if high_qtl_count > 1000:
+            return 1
+        else:
+            return 0
+
+    def identify_empty_samples(self):
+        no_val_samples = []
+        for sample_count, val in enumerate(self.vals):
+            if val == "x":
+                no_val_samples.append(sample_count)
+        return no_val_samples
+        
+    def trim_genotypes(self, genotype_data, no_value_samples):
+        trimmed_genotype_data = []
+        for marker in genotype_data:
+            new_genotypes = []
+            for item_count, genotype in enumerate(marker):
+                if item_count in no_value_samples:
+                    continue
+                try:
+                    genotype = float(genotype)
+                except ValueError:
+                    genotype = np.nan
+                    pass
+                new_genotypes.append(genotype)
+            trimmed_genotype_data.append(new_genotypes)
+        return trimmed_genotype_data
+    
+def create_snp_iterator_file(group):
+    plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
+    plink_input = input.plink(plink_file_base, type='b')
+    
+    data = dict(plink_input = list(plink_input),
+                numSNPs = plink_input.numSNPs)
+    
+    #input_dict = {}
+    #
+    #input_dict['plink_input'] = list(plink_input)
+    #input_dict['numSNPs'] = plink_input.numSNPs
+    #
+    
+    snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz")
+    
+    with gzip.open(snp_file_base, "wb") as fh:
+        pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL)
+
+#if __name__ == '__main__':
+#    import cPickle as pickle
+#    import gzip
+#    create_snp_iterator_file("HLC")
+    
+if __name__ == '__main__':
+    import cPickle as pickle
+    import gzip
+    create_snp_iterator_file("HLC")
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index b0eea3b6..4629296b 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -142,6 +142,7 @@ $ ->
         $("#progress_bar_container").modal()
         url = "/marker_regression"
         $('input[name=method]').val("pylmm")
+        $('input[name=num_perm]').val($('input[name=num_perm_pylmm]').val())
         $('input[name=manhattan_plot]').val($('input[name=manhattan_plot_pylmm]:checked').val())
         form_data = $('#trait_data_form').serialize()
         console.log("form_data is:", form_data)
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index 3fd98df3..61fb98de 100755
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -148,6 +148,12 @@
                 </div>
                 
                 <div class="tab-pane" id="pylmm">
+                    <div class="control-group mapping_method_fields">
+                        <label for="mapping_permutations" class="control-label">Permutations (n)</label>
+                        <div class="controls">
+                            <input name="num_perm_pylmm" value="2000" type="text" />
+                        </div>
+                    </div>
                     <div class="control-group" id="display_all_div">
                         <label class="control-label">Manhattan Plot</label>
                         <div class="controls">