aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py134
1 files changed, 69 insertions, 65 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 7a3d0312..83ebcdf9 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -20,6 +20,11 @@ if WEBSERVER_MODE == 'PROD':
GEMMAOPTS = "-no-check"
+def generate_random_n_string(n):
+ return ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(n))
+
+
def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
maf=0.01, first_run=True, output_files=None):
"""Generates p-values for each marker using GEMMA"""
@@ -29,89 +34,87 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
genofile_name = this_dataset.group.name
if first_run:
- trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + \
- ''.join(random.choice(string.ascii_uppercase + string.digits)
- for _ in range(6))
+ trait_filename = (f"{str(this_dataset.group.name)}_"
+ f"{str(this_trait.name)}_"
+ f"{generate_random_n_string(6)}")
gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
- if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
- open("{}{}_output.assoc.txt".format(
- webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
+ if not os.path.isfile(f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+ f"{genofile_name}_output.assoc.txt"):
+ open((f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+ f"{genofile_name}_output.assoc.txt"),
+ "w+")
- k_output_filename = this_dataset.group.name + "_K_" + \
- ''.join(random.choice(string.ascii_uppercase + string.digits)
- for _ in range(6))
- gwa_output_filename = this_dataset.group.name + "_GWA_" + \
- ''.join(random.choice(string.ascii_uppercase + string.digits)
- for _ in range(6))
+ k_output_filename = (f"{this_dataset.group.name}_K_"
+ f"{generate_random_n_string(6)}")
+ gwa_output_filename = (f"{this_dataset.group.name}_GWA_"
+ f"{generate_random_n_string(6)}")
this_chromosomes = this_dataset.species.chromosomes.chromosomes
- this_chromosomes_name = [
- this_chromosomes[chromosome].name for chromosome in this_chromosomes]
+ this_chromosomes_name = [this_chromosomes[chromosome].name
+ for chromosome in this_chromosomes]
chr_list_string = ",".join(this_chromosomes_name)
if covariates != "":
gen_covariates_file(this_dataset, covariates, samples)
if use_loco == "True":
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files(
- 'genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
-
+ generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+ f"{chr_list_string} -- {GEMMAOPTS} "
+ f"-g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+ f"{flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -gk > "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json")
os.system(generate_k_command)
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename)
+ gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+ f"--input {TEMPDIR}/gn2/{k_output_filename}.json "
+ f"-- {GEMMAOPTS} "
+ f"-g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt "
+ f"-p {TEMPDIR}/gn2/{trait_filename}.txt ")
if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
- this_dataset.group.name,
- flat_files(
- 'genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
+ gemma_command += (f"-c {flat_files('mapping')}/"
+ f"{this_dataset.group.name}_covariates.txt "
+ f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt "
+ f"-lmm 9 -maf {maf} > {TEMPDIR}/gn2/"
+ f"{gwa_output_filename}.json")
else:
- gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
+ gemma_command += (f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -lmm 9 -maf "
+ f"{maf} > "
+ f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
else:
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files(
- 'genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
+ generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json -- "
+ f"{GEMMAOPTS} "
+ f" -g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+ f"{flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -gk > "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json")
os.system(generate_k_command)
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- flat_files(
- 'genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename)
+ gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --input "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json -- "
+ f"{GEMMAOPTS} "
+ f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt "
+ f"-lmm 9 -g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt ")
if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (
- flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename)
+ gemma_command += (f" -c {flat_files('mapping')}/"
+ f"{this_dataset.group.name}"
+ f"_covariates.txt > "
+ f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
else:
- gemma_command += ' > %s/gn2/%s.json' % (
- TEMPDIR, gwa_output_filename)
+ gemma_command += f" > {TEMPDIR}/gn2/{gwa_output_filename}.json"
os.system(gemma_command)
else:
@@ -129,8 +132,7 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
"""Generates phenotype file for GEMMA"""
- current_file_data = []
- with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile:
+ with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile:
for value in vals:
if value == "x":
outfile.write("NA\n")
@@ -160,7 +162,9 @@ def gen_covariates_file(this_dataset, covariates, samples):
this_covariate_data.append("-9")
covariate_data_object.append(this_covariate_data)
- with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
+ with open((f"{flat_files('mapping')}/"
+ f"{this_dataset.group.name}_covariates.txt"),
+ "w") as outfile:
for i in range(len(covariate_data_object[0])):
for this_covariate in covariate_data_object:
outfile.write(str(this_covariate[i]) + "\t")
@@ -170,7 +174,7 @@ def gen_covariates_file(this_dataset, covariates, samples):
def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
output_filelist = []
- with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file:
+ with open(f"{TEMPDIR}/gn2/{gwa_output_filename}.json") as data_file:
data = json.load(data_file)
files = data['files']