aboutsummaryrefslogtreecommitdiff
import os
import math
import string
import random
import json
import re

from gn2.base import webqtlConfig
from gn2.base.trait import GeneralTrait
from gn2.base.data_set import create_dataset
from gn2.utility.tools import flat_files, REAPER_COMMAND, TEMPDIR


def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None):
    """Generates p-values for each marker using qtlreaper"""

    if first_run:
        if this_dataset.group.genofile != None:
            genofile_name = this_dataset.group.genofile[:-5]
        else:
            genofile_name = this_dataset.group.name

        trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno"
        gen_pheno_txt_file(samples, vals, trait_filename)

        output_filename = (f"{this_dataset.group.name}_GWA_"
                           + ''.join(random.choice(string.ascii_uppercase + string.digits)
                                     for _ in range(6))
                           )
        bootstrap_filename = None
        permu_filename = None

        opt_list = []
        if boot_check and num_bootstrap > 0:
            bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_"
                                  + ''.join(random.choice(string.ascii_uppercase + string.digits)
                                            for _ in range(6))
                                  )

            opt_list.append("-b")
            opt_list.append(f"--n_bootstrap {str(num_bootstrap)}")
            opt_list.append(
                f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt")
        if num_perm > 0:
            permu_filename = ("{this_dataset.group.name}_PERM_"
                              + ''.join(random.choice(string.ascii_uppercase
                                                      + string.digits) for _ in range(6))
                              )
            opt_list.append("-n " + str(num_perm))
            opt_list.append(
                "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt")
        if control_marker != "" and do_control == "true":
            opt_list.append("-c " + control_marker)
        if manhattan_plot != True:
            opt_list.append("--interval 1")

        reaper_command = (REAPER_COMMAND +
                          ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'),

                                                                                                   genofile_name,
                                                                                                   TEMPDIR,
                                                                                                   trait_filename,
                                                                                                   " ".join(
                              opt_list),
                              webqtlConfig.GENERATED_IMAGE_DIR,
                              output_filename))
        os.system(reaper_command)
    else:
        output_filename, permu_filename, bootstrap_filename = output_files

    marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(
        output_filename, permu_filename, bootstrap_filename)

    suggestive = 0
    significant = 0
    if len(permu_vals) > 0:
        suggestive = permu_vals[int(num_perm * 0.37 - 1)]
        significant = permu_vals[int(num_perm * 0.95 - 1)]

    return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals,
            [output_filename, permu_filename, bootstrap_filename])


def gen_pheno_txt_file(samples, vals, trait_filename):
    """Generates phenotype file for GEMMA"""

    with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile:
        outfile.write("Trait\t")

        filtered_sample_list = []
        filtered_vals_list = []
        for i, sample in enumerate(samples):
            if vals[i] != "x":
                filtered_sample_list.append(sample)
                filtered_vals_list.append(vals[i])

        samples_string = "\t".join(filtered_sample_list)
        outfile.write(samples_string + "\n")
        outfile.write("T1\t")
        values_string = "\t".join(filtered_vals_list)
        outfile.write(values_string)


def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename):
    included_markers = []
    p_values = []
    marker_obs = []

    only_cm = False
    only_mb = False

    with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{gwa_filename}.txt") as output_file:
        for line in output_file:
            if line.startswith("ID\t"):
                if len(line.split("\t")) < 8:
                    if 'cM' in line.split("\t"):
                        only_cm = True
                    else:
                        only_mb = True
                continue
            else:
                marker = {}
                marker['name'] = line.split("\t")[1]
                try:
                    marker['chr'] = int(line.split("\t")[2])
                except:
                    marker['chr'] = line.split("\t")[2]
                if only_cm or only_mb:
                    if only_cm:
                        marker['cM'] = float(line.split("\t")[3])
                    else:
                        if float(line.split("\t")[3]) > 1000:
                            marker['Mb'] = float(line.split("\t")[3]) / 1000000
                        else:
                            marker['Mb'] = float(line.split("\t")[3])
                    if float(line.split("\t")[6]) != 1:
                        marker['p_value'] = float(line.split("\t")[6])
                    marker['lrs_value'] = float(line.split("\t")[4])
                    marker['lod_score'] = marker['lrs_value'] / 4.61
                    marker['additive'] = float(line.split("\t")[5])
                else:
                    marker['cM'] = float(line.split("\t")[3])
                    if float(line.split("\t")[4]) > 1000:
                        marker['Mb'] = float(line.split("\t")[4]) / 1000000
                    else:
                        marker['Mb'] = float(line.split("\t")[4])
                    if float(line.split("\t")[7]) != 1:
                        marker['p_value'] = float(line.split("\t")[7])
                    marker['lrs_value'] = float(line.split("\t")[5])
                    marker['lod_score'] = marker['lrs_value'] / 4.61
                    marker['additive'] = float(line.split("\t")[6])
                marker_obs.append(marker)

    # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc
    sorted_indices = natural_sort(marker_obs)

    permu_vals = []
    if permu_filename:
        with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{permu_filename}.txt") as permu_file:
            for line in permu_file:
                permu_vals.append(float(line))

    bootstrap_vals = []
    if bootstrap_filename:
        with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") as bootstrap_file:
            for line in bootstrap_file:
                bootstrap_vals.append(int(line))

    marker_obs = [marker_obs[i] for i in sorted_indices]
    if len(bootstrap_vals) > 0:
        bootstrap_vals = [bootstrap_vals[i] for i in sorted_indices]

    return marker_obs, permu_vals, bootstrap_vals


def natural_sort(marker_list):
    """
    Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort
    Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results
    """

    def convert(text):
        if text.isdigit():
            return int(text)
        else:
            if text != "M":
                return text.lower()
            else:
                return "z"

    alphanum_key = lambda key: [convert(c) for c in re.split(
        '([0-9]+)', str(marker_list[key]['chr']))]
    return sorted(list(range(len(marker_list))), key=alphanum_key)