diff options
author | BonfaceKilz | 2020-12-02 01:38:09 +0300 |
---|---|---|
committer | GitHub | 2020-12-02 01:38:09 +0300 |
commit | 5943d5b81c4376d2063d9b1ba014dae919005868 (patch) | |
tree | fa4c6f2e7f8eab48d75b52f7d2856d471c07dd32 /wqflask/wqflask | |
parent | 6a143b56d22887b54d16393a546b9c3ac16aeb38 (diff) | |
parent | c96c1db5660367f1f86cfa76309c87866e79bf79 (diff) | |
download | genenetwork2-5943d5b81c4376d2063d9b1ba014dae919005868.tar.gz |
Merge pull request #484 from Alexanderlacuna/test-regression
Add tests for marker_regression
Diffstat (limited to 'wqflask/wqflask')
-rw-r--r-- | wqflask/wqflask/marker_regression/gemma_mapping.py | 9 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/plink_mapping.py | 22 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/qtlreaper_mapping.py | 46 |
3 files changed, 41 insertions, 36 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 68a8d5ba..02f91a32 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -31,16 +31,11 @@ def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) this_chromosomes = this_dataset.species.chromosomes.chromosomes - chr_list_string = "" - for i in range(len(this_chromosomes)): - if i < (len(this_chromosomes) - 1): - chr_list_string += this_chromosomes[i+1].name + "," - else: - chr_list_string += this_chromosomes[i+1].name + this_chromosomes_name=[chromosome.name for chromosome in this_chromosomes] + chr_list_string=",".join(this_chromosomes_name) if covariates != "": gen_covariates_file(this_dataset, covariates, samples) - if use_loco == "True": generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index fd91b6ca..5d675c38 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -9,11 +9,11 @@ import utility.logger logger = utility.logger.getLogger(__name__ ) def run_plink(this_trait, dataset, species, vals, maf): - plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) + plink_output_filename = webqtlUtil.genRandStr(f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) - plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( - flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) + + plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " logger.debug("plink_command:", plink_command) os.system(plink_command) @@ -29,12 +29,12 @@ def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" current_file_data = [] - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "r") as outfile: for i, line in enumerate(outfile): split_line = line.split() current_file_data.append(split_line) - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam","w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 @@ -44,8 +44,8 @@ def gen_pheno_txt_file(this_dataset, vals): def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): ped_sample_list = get_samples_from_ped_file(dataset) - output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") - header = 'FID\tIID\t%s\n' % this_trait.name + output_file = open(f"{TMPDIR}{pheno_filename}.txt", "wb") + header = f"FID\tIID\t{this_trait.name}\n" output_file.write(header) new_value_list = [] @@ -65,7 +65,7 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): for i, sample in enumerate(ped_sample_list): j = i+1 value = new_value_list[i] - new_line += '%s\t%s\t%s\n'%(sample, sample, value) + new_line += f"{sample}\t{sample}\t{value}\n" if j%1000 == 0: output_file.write(newLine) @@ -78,7 +78,7 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): # get strain name from ped file in order def get_samples_from_ped_file(dataset): - ped_file= open("{}{}.ped".format(flat_files('mapping'), dataset.group.name), "r") + ped_file= open(f"{flat_files('mapping')}{dataset.group.name}.ped","r") line = ped_file.readline() sample_list=[] @@ -98,7 +98,7 @@ def parse_plink_output(output_filename, species): threshold_p_value = 1 - result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb") + result_fp = open(f"{TMPDIR}{output_filename}.qassoc","rb") line = result_fp.readline() @@ -154,7 +154,7 @@ def parse_plink_output(output_filename, species): # function: convert line from str to list; # output: lineList list ####################################################### -def build_line_list(line=None): +def build_line_list(line=""): line_list = line.strip().split(' ')# irregular number of whitespaces between columns line_list = [item for item in line_list if item !=''] line_list = [item.strip() for item in line_list] diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 78b1f7b0..505ae295 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -17,22 +17,29 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo else: genofile_name = this_dataset.group.name - trait_filename = str(this_trait.name) + "_" + str(this_dataset.name) + "_pheno" + trait_filename =f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" gen_pheno_txt_file(samples, vals, trait_filename) - output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + output_filename = (f"{this_dataset.group.name}_GWA_"+ + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + ) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: - bootstrap_filename = this_dataset.group.name + "_BOOTSTRAP_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + ) opt_list.append("-b") - opt_list.append("--n_bootstrap " + str(num_bootstrap)) - opt_list.append("--bootstrap_output " + webqtlConfig.GENERATED_IMAGE_DIR + bootstrap_filename + ".txt") + opt_list.append(f"--n_bootstrap{str(num_bootstrap)}") + opt_list.append(f"--bootstrap_output{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") if num_perm > 0: - permu_filename = this_dataset.group.name + "_PERM_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + permu_filename =("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) opt_list.append("-n " + str(num_perm)) opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": @@ -40,13 +47,15 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo if manhattan_plot != True: opt_list.append("--interval 1") - reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - " ".join(opt_list), - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename) + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + + genofile_name, + TEMPDIR, + trait_filename, + " ".join(opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) @@ -61,12 +70,13 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo suggestive = permu_vals[int(num_perm*0.37-1)] significant = permu_vals[int(num_perm*0.95-1)] - return marker_obs, permu_vals, suggestive, significant, bootstrap_vals, [output_filename, permu_filename, bootstrap_filename] + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) def gen_pheno_txt_file(samples, vals, trait_filename): """Generates phenotype file for GEMMA""" - with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile: + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt","w") as outfile: outfile.write("Trait\t") filtered_sample_list = [] @@ -90,7 +100,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): only_cm = False only_mb = False - with open("{}{}.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, gwa_filename)) as output_file: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{gwa_filename}.txt") as output_file: for line in output_file: if line.startswith("ID\t"): if len(line.split("\t")) < 8: @@ -137,13 +147,13 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): permu_vals = [] if permu_filename: - with open("{}{}.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, permu_filename)) as permu_file: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{permu_filename}.txt") as permu_file: for line in permu_file: permu_vals.append(float(line)) bootstrap_vals = [] if bootstrap_filename: - with open("{}{}.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, bootstrap_filename)) as bootstrap_file: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") as bootstrap_file: for line in bootstrap_file: bootstrap_vals.append(int(line)) |