From 90b267ce7d399c3c198c57e0e76e81e95fb41691 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 21 Sep 2017 22:26:22 +0000 Subject: LOCO should work with GEMMA (when BIMBAM available), though I still need to do more testing --- wqflask/wqflask/marker_regression/gemma_mapping.py | 177 ++++++++++++--------- 1 file changed, 100 insertions(+), 77 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 2c99d1fc..96a91452 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,4 +1,4 @@ -import os, math +import os, math, string, random, json from base import webqtlConfig from base.trait import GeneralTrait @@ -20,7 +20,7 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): this_chromosomes = this_dataset.species.chromosomes.chromosomes chr_list_string = "" - for i in range(this_chromosomes): + for i in range(len(this_chromosomes)): if i < (len(this_chromosomes) - 1): chr_list_string += this_chromosomes[i+1].name + "," else: @@ -28,97 +28,77 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): if covariates != "": gen_covariates_file(this_dataset, covariates) - if method == "gemma": - #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -o %s_output' % (flat_files('mapping'), - gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) - # use GEMMA_RUN in the next one, create a unique temp file + + if method == "gemma": + gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('mapping'), + this_dataset.group.name) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) else: - logger.debug("FLAT FILES:", flat_files('mapping')) - if use_loco: - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > /home/zas1024/tmp/gn2/K.json' % (flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name) - logger.debug("k_command:" + generate_k_command) - os.system(generate_k_command) - - run_loco_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input /home/zas1024/tmp/gn2/K.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/GWA.json' % (flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name) - os.system(run_loco_command) - else: - gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) - else: - if method == "gemma": #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), - gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % (flat_files('mapping'), + gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + else: + if use_loco == "True": + k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > /home/zas1024/tmp/gn2/%s.json' % (flat_files('genotype/bimbam'), this_dataset.group.name, - flat_files('mapping'), + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, + k_output_filename) + logger.debug("k_command:" + generate_k_command) + os.system(generate_k_command) + + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input /home/zas1024/tmp/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (k_output_filename, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), this_dataset.group.name) - else: - if use_loco: - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > /home/zas1024/tmp/gn2/K.json' % (flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name) - logger.debug("k_command:" + generate_k_command) - os.system(generate_k_command) - - run_loco_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input /home/zas1024/tmp/gn2/K.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/GWA.json' % (flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - this_dataset.group.name) - os.system(run_loco_command) + + gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/%s.json' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + gwa_output_filename) else: - #gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('genotype/bimbam'), - gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'), + gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/GWA.json' % (flat_files('genotype/bimbam'), + this_dataset.group.name) + + else: + gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'), this_dataset.group.name, flat_files('genotype/bimbam'), this_dataset.group.name, flat_files('genotype/bimbam'), this_dataset.group.name, flat_files('genotype/bimbam'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) - logger.debug("gemma_command:" + gemma_command) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + else: + gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + + logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) - marker_obs = parse_gemma_output(this_dataset) + if use_loco == "True": + marker_obs = parse_loco_output(this_dataset, gwa_output_filename) + else: + marker_obs = parse_gemma_output(this_dataset) return marker_obs @@ -217,3 +197,46 @@ def parse_gemma_output(this_dataset): p_values.append(float(line.split("\t")[10])) return marker_obs + +def parse_loco_output(this_dataset, gwa_output_filename): + + output_filelist = [] + with open("/home/zas1024/tmp/gn2/" + gwa_output_filename + ".json") as data_file: + data = json.load(data_file) + + files = data['files'] + for file in files: + output_filelist.append(file[2]) + + included_markers = [] + p_values = [] + marker_obs = [] + previous_chr = 0 + + for this_file in output_filelist: + #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file: + with open(this_file) as output_file: + for line in output_file: + if line.startswith("chr"): + continue + else: + marker = {} + marker['name'] = line.split("\t")[1] + if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y": + marker['chr'] = int(line.split("\t")[0]) + else: + marker['chr'] = line.split("\t")[0] + marker['Mb'] = float(line.split("\t")[2]) / 1000000 + marker['p_value'] = float(line.split("\t")[10]) + if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): + marker['lod_score'] = 0 + #marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + marker_obs.append(marker) + + included_markers.append(line.split("\t")[1]) + p_values.append(float(line.split("\t")[10])) + + return marker_obs \ No newline at end of file -- cgit v1.2.3