gn2/wqflask/ctl/ctl_analysis.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

# CTL analysis for GN2
# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
import sys
from numpy import *
import rpy2.robjects as ro                    # R Objects
import rpy2.rinterface as ri

import simplejson as json

from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR
from gn2.utility import webqtlUtil                # Random number for the image
from gn2.utility import genofile_parser           # genofile_parser

import base64
import array
import csv
import itertools

from gn2.base import data_set
from gn2.base.trait import create_trait, retrieve_sample_data

from gn2.utility import helper_functions
from gn2.utility.tools import locate, GN2_BRANCH_URL

from rpy2.robjects.packages import importr


# Get pointers to some common R functions
r_library = ro.r["library"]             # Map the library function
r_options = ro.r["options"]             # Map the options function
r_t = ro.r["t"]                   # Map the t function
r_unlist = ro.r["unlist"]              # Map the unlist function
r_list = ro.r.list                   # Map the list function
r_png = ro.r["png"]                 # Map the png function for plotting
r_dev_off = ro.r["dev.off"]             # Map the dev.off function
r_write_table = ro.r["write.table"]         # Map the write.table function
r_data_frame = ro.r["data.frame"]         # Map the write.table function
r_as_numeric = ro.r["as.numeric"]         # Map the write.table function


class CTL:
    def __init__(self):
        # Load CTL - Should only be done once, since it is quite expensive
        r_library("ctl")
        r_options(stringsAsFactors=False)
        # Map the CTLscan function
        self.r_CTLscan = ro.r["CTLscan"]
        # Map the CTLsignificant function
        self.r_CTLsignificant = ro.r["CTLsignificant"]
        # Map the ctl.lineplot function
        self.r_lineplot = ro.r["ctl.lineplot"]
        # Map the CTLsignificant function
        self.r_plotCTLobject = ro.r["plot.CTLobject"]
        self.nodes_list = []
        self.edges_list = []

        self.gn2_url = GN2_BRANCH_URL

    def addNode(self, gt):
        node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name),
                              'sid': str(gt.name),
                              'dataset': str(gt.dataset.name),
                              'label': gt.name,
                              'symbol': gt.symbol,
                              'geneid': gt.geneid,
                              'omim': gt.omim}}
        self.nodes_list.append(node_dict)

    def addEdge(self, gtS, gtT, significant, x):
        edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol),
                     'source': str(gtS.name) + ":" + str(gtS.dataset.name),
                     'target': str(gtT.name) + ":" + str(gtT.dataset.name),
                     'lod': significant[3][x],
                     'color': "#ff0000",
                     'width': significant[3][x]}
        edge_dict = {'data': edge_data}
        self.edges_list.append(edge_dict)

    def run_analysis(self, requestform):
        self.trait_db_list = [trait.strip()
                              for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]
        strategy = requestform.get("strategy")
        nperm = int(requestform.get("nperm"))
        parametric = bool(requestform.get("parametric"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
            markernames.append(marker["name"])
            markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(
            individuals), dimnames=r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
            if trait != "":
                ts = trait.split(':')
                gt = create_trait(name=ts[0], dataset_name=ts[1])
                gt = retrieve_sample_data(gt, dataset, individuals)
                for ind in individuals:
                    if ind in list(gt.data.keys()):
                        traits.append(gt.data[ind].value)
                    else:
                        traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(
            individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True))

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names=False)
        rGeno = r_data_frame(rGeno, check_names=False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy=strategy,
                             nperm=nperm, parametric=parametric, nthreads=6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance=significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        # Store the user specified parameters for the output page
        self.results['requestform'] = requestform

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000,
              height=600, type='cairo-png')
        self.r_lineplot(res, significance=significance)
        r_dev_off()

        # We start from 2, since R starts from 1 :)
        n = 2
        for trait in self.trait_db_list:
            # Create the QTL like CTL plots
            self.results['imgurl' + \
                         str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
            self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \
                self.results['imgurl' + str(n)]
            r_png(self.results['imgloc' + str(n)],
                  width=1000, height=600, type='cairo-png')
            self.r_plotCTLobject(
                res, (n - 1), significance=significance, main='Phenotype ' + trait)
            r_dev_off()
            n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        if not isinstance(significant, ri.RNULLType):
            for x in range(len(significant[0])):
                # Source
                tsS = significant[0][x].split(':')
                # Target
                tsT = significant[2][x].split(':')
                # Retrieve Source info from the DB
                gtS = create_trait(name=tsS[0], dataset_name=tsS[1])
                # Retrieve Target info from the DB
                gtT = create_trait(name=tsT[0], dataset_name=tsT[1])
                self.addNode(gtS)
                self.addNode(gtT)
                self.addEdge(gtS, gtT, significant, x)

                # Update the trait name for the displayed table
                significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name)
                # Update the trait name for the displayed table
                significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name)

        self.elements = json.dumps(self.nodes_list + self.edges_list)

    def loadImage(self, path, name):
        imgfile = open(self.results[path], 'rb')
        imgdata = imgfile.read()
        imgB64 = base64.b64encode(imgdata)
        bytesarray = array.array('B', imgB64)
        self.results[name] = bytesarray

    def render_image(self, results):
        self.loadImage("imgloc1", "imgdata1")
        n = 2
        for trait in self.trait_db_list:
            self.loadImage("imgloc" + str(n), "imgdata" + str(n))
            n = n + 1

    def process_results(self, results):
        template_vars = {}
        template_vars["results"] = self.results
        template_vars["elements"] = self.elements
        self.render_image(self.results)
        sys.stdout.flush()
        return(dict(template_vars))