aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/heatmap/heatmap.py
blob: c2dd55bded62cecedc92c4abccd8b3ee96e23dd9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import string
import os
import random
from gn2.base import species
from gn2.base import webqtlConfig
from gn2.utility import helper_functions

from gn2.utility.tools import flat_files, REAPER_COMMAND, TEMPDIR
from redis import Redis
from flask import Flask, g

from gn2.wqflask.database import database_connection
from gn2.utility.tools import get_setting

Redis = Redis()


class Heatmap:

    def __init__(self, db_cursor, start_vars, temp_uuid):
        trait_db_list = [trait.strip()
                         for trait in start_vars['trait_list'].split(',')]
        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.temp_uuid = temp_uuid
        self.num_permutations = 5000
        self.dataset = self.trait_list[0][1]

        self.json_data = {}  # The dictionary that will be used to create the json object that contains all the data needed to create the figure

        self.all_sample_list = []
        self.traits = []

        chrnames = []
        self.species = species.TheSpecies(dataset=self.trait_list[0][1])

        with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as db_cursor:
            for this_chr in self.species.chromosomes.chromosomes(db_cursor):
                chrnames.append([self.species.chromosomes.chromosomes(db_cursor)[this_chr].name,
                                self.species.chromosomes.chromosomes(db_cursor)[this_chr].mb_length])

        for trait_db in self.trait_list:

            this_trait = trait_db[0]
            self.traits.append(this_trait.name)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        self.gen_reaper_results()

        lodnames = []
        chr_pos = []
        pos = []
        markernames = []

        for trait in list(self.trait_results.keys()):
            lodnames.append(trait)

        self.dataset.group.get_markers()
        for marker in self.dataset.group.markers.markers:
            chr_pos.append(marker['chr'])
            pos.append(marker['Mb'])
            markernames.append(marker['name'])

        self.json_data['chrnames'] = chrnames
        self.json_data['lodnames'] = lodnames
        self.json_data['chr'] = chr_pos
        self.json_data['pos'] = pos
        self.json_data['markernames'] = markernames

        for trait in self.trait_results:
            self.json_data[trait] = self.trait_results[trait]

        self.js_data = dict(
            json_data=self.json_data
        )

    def gen_reaper_results(self):
        self.trait_results = {}
        for trait_db in self.trait_list:
            self.dataset.group.get_markers()
            this_trait = trait_db[0]

            genotype = self.dataset.group.read_genotype_file(use_reaper=False)
            samples, values, variances, sample_aliases = this_trait.export_informative()

            if self.dataset.group.genofile != None:
                genofile_name = self.dataset.group.genofile[:-5]
            else:
                genofile_name = self.dataset.group.name

            trimmed_samples = []
            trimmed_values = []
            for i in range(0, len(samples)):
                if samples[i] in self.dataset.group.samplelist:
                    trimmed_samples.append(str(samples[i]))
                    trimmed_values.append(values[i])

            trait_filename = str(this_trait.name) + "_" + \
                str(self.dataset.name) + "_pheno"
            gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename)

            output_filename = self.dataset.group.name + "_GWA_" + \
                ''.join(random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(6))

            reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(flat_files('genotype'),
                                                                                                                           genofile_name,
                                                                                                                           TEMPDIR,
                                                                                                                           trait_filename,
                                                                                                                           webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                                                           output_filename)

            os.system(reaper_command)

            reaper_results = parse_reaper_output(output_filename)

            lrs_values = [float(qtl['lrs_value']) for qtl in reaper_results]

            self.trait_results[this_trait.name] = []
            for qtl in reaper_results:
                if qtl['additive'] > 0:
                    self.trait_results[this_trait.name].append(
                        -float(qtl['lrs_value']))
                else:
                    self.trait_results[this_trait.name].append(
                        float(qtl['lrs_value']))


def gen_pheno_txt_file(samples, vals, filename):
    """Generates phenotype file for GEMMA"""

    with open("{0}/gn2/{1}.txt".format(TEMPDIR, filename), "w") as outfile:
        outfile.write("Trait\t")

        filtered_sample_list = []
        filtered_vals_list = []
        for i, sample in enumerate(samples):
            if vals[i] != "x":
                filtered_sample_list.append(sample)
                filtered_vals_list.append(str(vals[i]))

        samples_string = "\t".join(filtered_sample_list)
        outfile.write(samples_string + "\n")
        outfile.write("T1\t")
        values_string = "\t".join(filtered_vals_list)
        outfile.write(values_string)


def parse_reaper_output(gwa_filename):
    included_markers = []
    p_values = []
    marker_obs = []

    with open("{}{}.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, gwa_filename)) as output_file:
        for line in output_file:
            if line.startswith("ID\t"):
                continue
            else:
                marker = {}
                marker['name'] = line.split("\t")[1]
                try:
                    marker['chr'] = int(line.split("\t")[2])
                except:
                    marker['chr'] = line.split("\t")[2]
                marker['cM'] = float(line.split("\t")[3])
                marker['Mb'] = float(line.split("\t")[4])
                if float(line.split("\t")[7]) != 1:
                    marker['p_value'] = float(line.split("\t")[7])
                marker['lrs_value'] = float(line.split("\t")[5])
                marker['lod_score'] = marker['lrs_value'] / 4.61
                marker['additive'] = float(line.split("\t")[6])
                marker_obs.append(marker)

    return marker_obs