aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression.py160
-rw-r--r--wqflask/wqflask/marker_regression/plink_mapping.py168
-rw-r--r--wqflask/wqflask/marker_regression/rqtl_mapping.py384
3 files changed, 363 insertions, 349 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7ad5d25a..fbd9a816 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -35,7 +35,7 @@ from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
from utility.benchmark import Bench
-from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping
+from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping
from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR
from utility.external import shell
@@ -212,7 +212,8 @@ class MarkerRegression(object):
self.control_marker,
self.manhattan_plot)
elif self.mapping_method == "plink":
- results = self.run_plink()
+ results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf)
+ #results = self.run_plink()
elif self.mapping_method == "pylmm":
logger.debug("RUNNING PYLMM")
if self.num_perm > 0:
@@ -314,7 +315,7 @@ class MarkerRegression(object):
output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
- self.gen_pheno_txt_file_plink(pheno_filename = output_filename)
+ plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename)
rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
@@ -322,159 +323,6 @@ class MarkerRegression(object):
count, p_values = self.parse_rqtl_output(plink_output_filename)
- def run_plink(self):
- plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
-
- self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)
-
- plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
- logger.debug("plink_command:", plink_command)
-
- os.system(plink_command)
-
- count, p_values = self.parse_plink_output(plink_output_filename)
-
- #for marker in self.dataset.group.markers.markers:
- # if marker['name'] not in included_markers:
- # logger.debug("marker:", marker)
- # self.dataset.group.markers.markers.remove(marker)
- # #del self.dataset.group.markers.markers[marker]
-
- logger.debug("p_values:", pf(p_values))
-
- self.dataset.group.markers.add_pvalues(p_values)
-
- return self.dataset.group.markers.markers
-
-
- def gen_pheno_txt_file_plink(self, pheno_filename = ''):
- ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
- header = 'FID\tIID\t%s\n' % self.this_trait.name
- output_file.write(header)
-
- new_value_list = []
-
- #if valueDict does not include some strain, value will be set to -9999 as missing value
- for i, sample in enumerate(ped_sample_list):
- try:
- value = self.vals[i]
- value = str(value).replace('value=','')
- value = value.strip()
- except:
- value = -9999
-
- new_value_list.append(value)
-
-
- new_line = ''
- for i, sample in enumerate(ped_sample_list):
- j = i+1
- value = new_value_list[i]
- new_line += '%s\t%s\t%s\n'%(sample, sample, value)
-
- if j%1000 == 0:
- output_file.write(newLine)
- new_line = ''
-
- if new_line:
- output_file.write(new_line)
-
- output_file.close()
-
- # get strain name from ped file in order
- def get_samples_from_ped_file(self):
- ped_file= open("{}/{}.ped".format(PLINK_PATH, self.dataset.group.name),"r")
- line = ped_file.readline()
- sample_list=[]
-
- while line:
- lineList = string.split(string.strip(line), '\t')
- lineList = map(string.strip, lineList)
-
- sample_name = lineList[0]
- sample_list.append(sample_name)
-
- line = ped_file.readline()
-
- return sample_list
-
- def parse_plink_output(self, output_filename):
- plink_results={}
-
- threshold_p_value = 0.01
-
- result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb")
-
- header_line = result_fp.readline()# read header line
- line = result_fp.readline()
-
- value_list = [] # initialize value list, this list will include snp, bp and pvalue info
- p_value_dict = {}
- count = 0
-
- while line:
- #convert line from str to list
- line_list = self.build_line_list(line=line)
-
- # only keep the records whose chromosome name is in db
- if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
-
- chr_name = self.species.chromosomes.chromosomes[int(line_list[0])]
- snp = line_list[1]
- BP = line_list[2]
- p_value = float(line_list[-1])
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- p_value_dict[snp] = float(p_value)
-
- if plink_results.has_key(chr_name):
- value_list = plink_results[chr_name]
-
- # pvalue range is [0,1]
- if threshold_p_value >=0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- plink_results[chr_name] = value_list
- value_list = []
- else:
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- if value_list:
- plink_results[chr_name] = value_list
-
- value_list=[]
-
- line = result_fp.readline()
- else:
- line = result_fp.readline()
-
- #if p_value_list:
- # min_p_value = min(p_value_list)
- #else:
- # min_p_value = 0
-
- return count, p_value_dict
-
- ######################################################
- # input: line: str,one line read from file
- # function: convert line from str to list;
- # output: lineList list
- #######################################################
- def build_line_list(self, line=None):
-
- line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
- line_list = [item for item in line_list if item <>'']
- line_list = map(string.strip, line_list)
-
- return line_list
-
-
def run_permutations(self, temp_uuid):
"""Runs permutations and gets significant and suggestive LOD scores"""
diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py
new file mode 100644
index 00000000..c80dc700
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/plink_mapping.py
@@ -0,0 +1,168 @@
+import string
+import os
+
+from base.webqtlConfig import TMPDIR
+from utility import webqtlUtil
+from utility.tools import PLINK_COMMAND
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def run_plink(this_trait, dataset, species, vals, maf):
+ plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))
+
+ gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename)
+
+ plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH,
+ dataset.group.name,
+ PLINK_PATH,
+ dataset.group.name,
+ TMPDIR,
+ plink_output_filename,
+ this_trait.name,
+ maf,
+ TMPDIR,
+ plink_output_filename)
+ logger.debug("plink_command:", plink_command)
+
+ os.system(plink_command)
+
+ count, p_values = parse_plink_output(plink_output_filename, species)
+
+ #for marker in self.dataset.group.markers.markers:
+ # if marker['name'] not in included_markers:
+ # logger.debug("marker:", marker)
+ # self.dataset.group.markers.markers.remove(marker)
+ # #del self.dataset.group.markers.markers[marker]
+
+ logger.debug("p_values:", pf(p_values))
+
+ dataset.group.markers.add_pvalues(p_values)
+
+ return dataset.group.markers.markers
+
+
+def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''):
+ ped_sample_list = get_samples_from_ped_file(dataset)
+ output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
+ header = 'FID\tIID\t%s\n' % this_trait.name
+ output_file.write(header)
+
+ new_value_list = []
+
+ #if valueDict does not include some strain, value will be set to -9999 as missing value
+ for i, sample in enumerate(ped_sample_list):
+ try:
+ value = vals[i]
+ value = str(value).replace('value=','')
+ value = value.strip()
+ except:
+ value = -9999
+
+ new_value_list.append(value)
+
+ new_line = ''
+ for i, sample in enumerate(ped_sample_list):
+ j = i+1
+ value = new_value_list[i]
+ new_line += '%s\t%s\t%s\n'%(sample, sample, value)
+
+ if j%1000 == 0:
+ output_file.write(newLine)
+ new_line = ''
+
+ if new_line:
+ output_file.write(new_line)
+
+ output_file.close()
+
+# get strain name from ped file in order
+def get_samples_from_ped_file(dataset):
+ ped_file= open("{}/{}.ped".format(PLINK_PATH, dataset.group.name),"r")
+ line = ped_file.readline()
+ sample_list=[]
+
+ while line:
+ lineList = string.split(string.strip(line), '\t')
+ lineList = map(string.strip, lineList)
+
+ sample_name = lineList[0]
+ sample_list.append(sample_name)
+
+ line = ped_file.readline()
+
+ return sample_list
+
+def parse_plink_output(output_filename, species):
+ plink_results={}
+
+ threshold_p_value = 0.01
+
+ result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb")
+
+ header_line = result_fp.readline()# read header line
+ line = result_fp.readline()
+
+ value_list = [] # initialize value list, this list will include snp, bp and pvalue info
+ p_value_dict = {}
+ count = 0
+
+ while line:
+ #convert line from str to list
+ line_list = build_line_list(line=line)
+
+ # only keep the records whose chromosome name is in db
+ if species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
+
+ chr_name = species.chromosomes.chromosomes[int(line_list[0])]
+ snp = line_list[1]
+ BP = line_list[2]
+ p_value = float(line_list[-1])
+ if threshold_p_value >= 0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ p_value_dict[snp] = float(p_value)
+
+ if plink_results.has_key(chr_name):
+ value_list = plink_results[chr_name]
+
+ # pvalue range is [0,1]
+ if threshold_p_value >=0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ value_list.append((snp, BP, p_value))
+ count += 1
+
+ plink_results[chr_name] = value_list
+ value_list = []
+ else:
+ if threshold_p_value >= 0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ value_list.append((snp, BP, p_value))
+ count += 1
+
+ if value_list:
+ plink_results[chr_name] = value_list
+
+ value_list=[]
+
+ line = result_fp.readline()
+ else:
+ line = result_fp.readline()
+
+ #if p_value_list:
+ # min_p_value = min(p_value_list)
+ #else:
+ # min_p_value = 0
+
+ return count, p_value_dict
+
+######################################################
+# input: line: str,one line read from file
+# function: convert line from str to list;
+# output: lineList list
+#######################################################
+def build_line_list(line=None):
+ line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
+ line_list = [item for item in line_list if item <>'']
+ line_list = map(string.strip, line_list)
+
+ return line_list \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 93bf717c..afd28563 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -1,193 +1,191 @@
-import rpy2.robjects as ro
-import numpy as np
-
-from base.webqtlConfig import TMPDIR
-from utility import webqtlUtil
-from utility.tools import locate, TEMPDIR
-
-def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
- geno_to_rqtl_function(dataset)
-
- ## Get pointers to some common R functions
- r_library = ro.r["library"] # Map the library function
- r_c = ro.r["c"] # Map the c function
- r_sum = ro.r["sum"] # Map the sum function
- plot = ro.r["plot"] # Map the plot function
- postscript = ro.r["postscript"] # Map the postscript function
- png = ro.r["png"] # Map the png function
- dev_off = ro.r["dev.off"] # Map the device off function
-
- print(r_library("qtl")) # Load R/qtl
-
- ## Get pointers to some R/qtl functions
- scanone = ro.r["scanone"] # Map the scanone function
- scantwo = ro.r["scantwo"] # Map the scantwo function
- calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
- read_cross = ro.r["read.cross"] # Map the read.cross function
- write_cross = ro.r["write.cross"] # Map the write.cross function
- GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
-
- crossname = dataset.group.name
- genofilelocation = locate(crossname + ".geno", "genotype")
- crossfilelocation = TMPDIR + crossname + ".cross"
-
- #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
-
- cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
-
- if manhattan_plot:
- cross_object = calc_genoprob(cross_object)
- else:
- cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
-
- cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
-
- # for debug: write_cross(cross_object, "csvr", "test.csvr")
-
- # Scan for QTLs
- covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
-
- if pair_scan:
- if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
- print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
- else:
- print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
-
- #print("Pair scan results:", result_data_frame)
-
- pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
- png(file=TEMPDIR+pair_scan_filename)
- plot(result_data_frame)
- dev_off()
-
- return process_pair_scan_results(result_data_frame)
- else:
- if do_control == "true":
- print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
- else:
- print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
-
- if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
- if do_control == "true":
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
- else:
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
-
- perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
- return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
- else:
- return process_rqtl_results(result_data_frame)
-
-def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-
- ro.r("""
- trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
-
- getGenoCode <- function(header, name = 'unk'){
- mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
- return(trim(strsplit(header[mat],':')[[1]][2]))
- }
-
- GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
- header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
- toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
-
- genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
- type <- getGenoCode(header, 'type')
- genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
- cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
- if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
- if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
- outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
- c('sex', '', '', sex), # Sex phenotype for the mice
- cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
- write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
- require(qtl)
- cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
- if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
- return(cross)
- }
- """ % (dataset.group.name + ".geno"))
-
-def add_phenotype(cross, pheno_as_string):
- ro.globalenv["the_cross"] = cross
- ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
- return ro.r["the_cross"]
-
-def create_covariates(control_marker, cross):
- ro.globalenv["the_cross"] = cross
- ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
- userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
- covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
- #print("Marker names of selected covariates:", covariate_names)
- ro.r('covnames <- c(' + covariate_names + ')')
- ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
- ro.r('covnames <- covnames[covInGeno]')
- ro.r("cat('covnames (purged): ', covnames,'\n')")
- ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
- #print("R/qtl matrix of covariates:", ro.r["covariates"])
- return ro.r["covariates"]
-
-def sanitize_rqtl_phenotype(vals):
- pheno_as_string = "c("
- for i, val in enumerate(vals):
- if val == "x":
- if i < (len(vals) - 1):
- pheno_as_string += "NA,"
- else:
- pheno_as_string += "NA"
- else:
- if i < (len(vals) - 1):
- pheno_as_string += str(val) + ","
- else:
- pheno_as_string += str(val)
- pheno_as_string += ")"
- return pheno_as_string
-
-def process_pair_scan_results(result):
- pair_scan_results = []
-
- result = result[1]
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scantwo output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr1'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['chr2'] = int(output[i][2])
- pair_scan_results.append(marker)
-
- #print("pair_scan_results:", pair_scan_results)
-
- return pair_scan_results
-
-def process_rqtl_perm_results(num_perm, results):
- perm_vals = []
- for line in str(results).split("\n")[1:(num_perm+1)]:
- #print("R/qtl permutation line:", line.split())
- perm_vals.append(float(line.split()[1]))
-
- perm_output = perm_vals
- suggestive = np.percentile(np.array(perm_vals), 67)
- significant = np.percentile(np.array(perm_vals), 95)
- print("SIGNIFICANT:", significant)
-
- return perm_output, suggestive, significant
-
-def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
- qtl_results = []
-
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scanone output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['lod_score'] = output[i][2]
- qtl_results.append(marker)
-
- return qtl_results \ No newline at end of file
+import rpy2.robjects as ro
+import numpy as np
+
+from base.webqtlConfig import TMPDIR
+from utility import webqtlUtil
+from utility.tools import locate, TEMPDIR
+
+def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
+ geno_to_rqtl_function(dataset)
+
+ ## Get pointers to some common R functions
+ r_library = ro.r["library"] # Map the library function
+ r_c = ro.r["c"] # Map the c function
+ r_sum = ro.r["sum"] # Map the sum function
+ plot = ro.r["plot"] # Map the plot function
+ postscript = ro.r["postscript"] # Map the postscript function
+ png = ro.r["png"] # Map the png function
+ dev_off = ro.r["dev.off"] # Map the device off function
+
+ print(r_library("qtl")) # Load R/qtl
+
+ ## Get pointers to some R/qtl functions
+ scanone = ro.r["scanone"] # Map the scanone function
+ scantwo = ro.r["scantwo"] # Map the scantwo function
+ calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
+ read_cross = ro.r["read.cross"] # Map the read.cross function
+ write_cross = ro.r["write.cross"] # Map the write.cross function
+ GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
+
+ crossname = dataset.group.name
+ genofilelocation = locate(crossname + ".geno", "genotype")
+ crossfilelocation = TMPDIR + crossname + ".cross"
+
+ #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
+
+ cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
+
+ if manhattan_plot:
+ cross_object = calc_genoprob(cross_object)
+ else:
+ cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+
+ cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
+
+ # for debug: write_cross(cross_object, "csvr", "test.csvr")
+
+ # Scan for QTLs
+ covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
+
+ if pair_scan:
+ if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
+ print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
+ else:
+ print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
+
+ #print("Pair scan results:", result_data_frame)
+
+ pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
+ png(file=TEMPDIR+pair_scan_filename)
+ plot(result_data_frame)
+ dev_off()
+
+ return process_pair_scan_results(result_data_frame)
+ else:
+ if do_control == "true":
+ print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
+ else:
+ print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
+
+ if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
+ if do_control == "true":
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
+ else:
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
+
+ perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
+ return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
+ else:
+ return process_rqtl_results(result_data_frame)
+
+def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+
+ ro.r("""
+ trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+
+ getGenoCode <- function(header, name = 'unk'){
+ mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+ return(trim(strsplit(header[mat],':')[[1]][2]))
+ }
+
+ GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+ header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
+ toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
+
+ genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
+ type <- getGenoCode(header, 'type')
+ genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
+ cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
+ if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
+ if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
+ outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
+ c('sex', '', '', sex), # Sex phenotype for the mice
+ cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
+ write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
+ require(qtl)
+ cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
+ if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
+ return(cross)
+ }
+ """ % (dataset.group.name + ".geno"))
+
+def add_phenotype(cross, pheno_as_string):
+ ro.globalenv["the_cross"] = cross
+ ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+ return ro.r["the_cross"]
+
+def create_covariates(control_marker, cross):
+ ro.globalenv["the_cross"] = cross
+ ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
+ userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
+ covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+ #print("Marker names of selected covariates:", covariate_names)
+ ro.r('covnames <- c(' + covariate_names + ')')
+ ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
+ ro.r('covnames <- covnames[covInGeno]')
+ ro.r("cat('covnames (purged): ', covnames,'\n')")
+ ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
+ #print("R/qtl matrix of covariates:", ro.r["covariates"])
+ return ro.r["covariates"]
+
+def sanitize_rqtl_phenotype(vals):
+ pheno_as_string = "c("
+ for i, val in enumerate(vals):
+ if val == "x":
+ if i < (len(vals) - 1):
+ pheno_as_string += "NA,"
+ else:
+ pheno_as_string += "NA"
+ else:
+ if i < (len(vals) - 1):
+ pheno_as_string += str(val) + ","
+ else:
+ pheno_as_string += str(val)
+ pheno_as_string += ")"
+ return pheno_as_string
+
+def process_pair_scan_results(result):
+ pair_scan_results = []
+
+ result = result[1]
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scantwo output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr1'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['chr2'] = int(output[i][2])
+ pair_scan_results.append(marker)
+
+ return pair_scan_results
+
+def process_rqtl_perm_results(num_perm, results):
+ perm_vals = []
+ for line in str(results).split("\n")[1:(num_perm+1)]:
+ #print("R/qtl permutation line:", line.split())
+ perm_vals.append(float(line.split()[1]))
+
+ perm_output = perm_vals
+ suggestive = np.percentile(np.array(perm_vals), 67)
+ significant = np.percentile(np.array(perm_vals), 95)
+
+ return perm_output, suggestive, significant
+
+def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
+ qtl_results = []
+
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scanone output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['lod_score'] = output[i][2]
+ qtl_results.append(marker)
+
+ return qtl_results
+ \ No newline at end of file