diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/marker_regression/gemma_mapping.py | 134 |
1 files changed, 69 insertions, 65 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 7a3d0312..83ebcdf9 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -20,6 +20,11 @@ if WEBSERVER_MODE == 'PROD': GEMMAOPTS = "-no-check" +def generate_random_n_string(n): + return ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(n)) + + def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01, first_run=True, output_files=None): """Generates p-values for each marker using GEMMA""" @@ -29,89 +34,87 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, genofile_name = this_dataset.group.name if first_run: - trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + \ - ''.join(random.choice(string.ascii_uppercase + string.digits) - for _ in range(6)) + trait_filename = (f"{str(this_dataset.group.name)}_" + f"{str(this_trait.name)}_" + f"{generate_random_n_string(6)}") gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename) - if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): - open("{}{}_output.assoc.txt".format( - webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") + if not os.path.isfile(f"{webqtlConfig.GENERATED_IMAGE_DIR}" + f"{genofile_name}_output.assoc.txt"): + open((f"{webqtlConfig.GENERATED_IMAGE_DIR}" + f"{genofile_name}_output.assoc.txt"), + "w+") - k_output_filename = this_dataset.group.name + "_K_" + \ - ''.join(random.choice(string.ascii_uppercase + string.digits) - for _ in range(6)) - gwa_output_filename = this_dataset.group.name + "_GWA_" + \ - ''.join(random.choice(string.ascii_uppercase + string.digits) - for _ in range(6)) + k_output_filename = (f"{this_dataset.group.name}_K_" + f"{generate_random_n_string(6)}") + gwa_output_filename = (f"{this_dataset.group.name}_GWA_" + f"{generate_random_n_string(6)}") this_chromosomes = this_dataset.species.chromosomes.chromosomes - this_chromosomes_name = [ - this_chromosomes[chromosome].name for chromosome in this_chromosomes] + this_chromosomes_name = [this_chromosomes[chromosome].name + for chromosome in this_chromosomes] chr_list_string = ",".join(this_chromosomes_name) if covariates != "": gen_covariates_file(this_dataset, covariates, samples) if use_loco == "True": - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename, - flat_files( - 'genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) - + generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco " + f"{chr_list_string} -- {GEMMAOPTS} " + f"-g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt -p " + f"{TEMPDIR}/gn2/{trait_filename}.txt -a " + f"{flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt -gk > " + f"{TEMPDIR}/gn2/{k_output_filename}.json") os.system(generate_k_command) - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename) + gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco " + f"--input {TEMPDIR}/gn2/{k_output_filename}.json " + f"-- {GEMMAOPTS} " + f"-g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt " + f"-p {TEMPDIR}/gn2/{trait_filename}.txt ") if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'), - this_dataset.group.name, - flat_files( - 'genotype/bimbam'), - genofile_name, - maf, - TEMPDIR, - gwa_output_filename) + gemma_command += (f"-c {flat_files('mapping')}/" + f"{this_dataset.group.name}_covariates.txt " + f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt " + f"-lmm 9 -maf {maf} > {TEMPDIR}/gn2/" + f"{gwa_output_filename}.json") else: - gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - maf, - TEMPDIR, - gwa_output_filename) + gemma_command += (f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt -lmm 9 -maf " + f"{maf} > " + f"{TEMPDIR}/gn2/{gwa_output_filename}.json") else: - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename, - flat_files( - 'genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) + generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json -- " + f"{GEMMAOPTS} " + f" -g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt -p " + f"{TEMPDIR}/gn2/{trait_filename}.txt -a " + f"{flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt -gk > " + f"{TEMPDIR}/gn2/{k_output_filename}.json") os.system(generate_k_command) - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files( - 'genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename) + gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --input " + f"{TEMPDIR}/gn2/{k_output_filename}.json -- " + f"{GEMMAOPTS} " + f"-a {flat_files('genotype/bimbam')}/" + f"{genofile_name}_snps.txt " + f"-lmm 9 -g {flat_files('genotype/bimbam')}/" + f"{genofile_name}_geno.txt -p " + f"{TEMPDIR}/gn2/{trait_filename}.txt ") if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % ( - flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename) + gemma_command += (f" -c {flat_files('mapping')}/" + f"{this_dataset.group.name}" + f"_covariates.txt > " + f"{TEMPDIR}/gn2/{gwa_output_filename}.json") else: - gemma_command += ' > %s/gn2/%s.json' % ( - TEMPDIR, gwa_output_filename) + gemma_command += f" > {TEMPDIR}/gn2/{gwa_output_filename}.json" os.system(gemma_command) else: @@ -129,8 +132,7 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename): """Generates phenotype file for GEMMA""" - current_file_data = [] - with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile: + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: for value in vals: if value == "x": outfile.write("NA\n") @@ -160,7 +162,9 @@ def gen_covariates_file(this_dataset, covariates, samples): this_covariate_data.append("-9") covariate_data_object.append(this_covariate_data) - with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: + with open((f"{flat_files('mapping')}/" + f"{this_dataset.group.name}_covariates.txt"), + "w") as outfile: for i in range(len(covariate_data_object[0])): for this_covariate in covariate_data_object: outfile.write(str(this_covariate[i]) + "\t") @@ -170,7 +174,7 @@ def gen_covariates_file(this_dataset, covariates, samples): def parse_loco_output(this_dataset, gwa_output_filename, loco="True"): output_filelist = [] - with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file: + with open(f"{TEMPDIR}/gn2/{gwa_output_filename}.json") as data_file: data = json.load(data_file) files = data['files'] |