1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
# CTL analysis for GN2
# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
import sys
from numpy import *
import rpy2.robjects as ro # R Objects
import rpy2.rinterface as ri
import simplejson as json
from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR
from gn2.utility import webqtlUtil # Random number for the image
from gn2.utility import genofile_parser # genofile_parser
import base64
import array
import csv
import itertools
from gn2.base import data_set
from gn2.base.trait import create_trait, retrieve_sample_data
from gn2.utility import helper_functions
from gn2.utility.tools import locate, GN2_BRANCH_URL
from rpy2.robjects.packages import importr
# Get pointers to some common R functions
r_library = ro.r["library"] # Map the library function
r_options = ro.r["options"] # Map the options function
r_t = ro.r["t"] # Map the t function
r_unlist = ro.r["unlist"] # Map the unlist function
r_list = ro.r.list # Map the list function
r_png = ro.r["png"] # Map the png function for plotting
r_dev_off = ro.r["dev.off"] # Map the dev.off function
r_write_table = ro.r["write.table"] # Map the write.table function
r_data_frame = ro.r["data.frame"] # Map the write.table function
r_as_numeric = ro.r["as.numeric"] # Map the write.table function
class CTL:
def __init__(self):
# Load CTL - Should only be done once, since it is quite expensive
r_library("ctl")
r_options(stringsAsFactors=False)
# Map the CTLscan function
self.r_CTLscan = ro.r["CTLscan"]
# Map the CTLsignificant function
self.r_CTLsignificant = ro.r["CTLsignificant"]
# Map the ctl.lineplot function
self.r_lineplot = ro.r["ctl.lineplot"]
# Map the CTLsignificant function
self.r_plotCTLobject = ro.r["plot.CTLobject"]
self.nodes_list = []
self.edges_list = []
self.gn2_url = GN2_BRANCH_URL
def addNode(self, gt):
node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name),
'sid': str(gt.name),
'dataset': str(gt.dataset.name),
'label': gt.name,
'symbol': gt.symbol,
'geneid': gt.geneid,
'omim': gt.omim}}
self.nodes_list.append(node_dict)
def addEdge(self, gtS, gtT, significant, x):
edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol),
'source': str(gtS.name) + ":" + str(gtS.dataset.name),
'target': str(gtT.name) + ":" + str(gtT.dataset.name),
'lod': significant[3][x],
'color': "#ff0000",
'width': significant[3][x]}
edge_dict = {'data': edge_data}
self.edges_list.append(edge_dict)
def run_analysis(self, requestform):
self.trait_db_list = [trait.strip()
for trait in requestform['trait_list'].split(',')]
self.trait_db_list = [x for x in self.trait_db_list if x]
strategy = requestform.get("strategy")
nperm = int(requestform.get("nperm"))
parametric = bool(requestform.get("parametric"))
significance = float(requestform.get("significance"))
# Get the name of the .geno file belonging to the first phenotype
datasetname = self.trait_db_list[0].split(":")[1]
dataset = data_set.create_dataset(datasetname)
genofilelocation = locate(dataset.group.name + ".geno", "genotype")
parser = genofile_parser.ConvertGenoFile(genofilelocation)
parser.process_csv()
# Create a genotype matrix
individuals = parser.individuals
markers = []
markernames = []
for marker in parser.markers:
markernames.append(marker["name"])
markers.append(marker["genotypes"])
genotypes = list(itertools.chain(*markers))
rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(
individuals), dimnames=r_list(markernames, individuals), byrow=True))
# Create a phenotype matrix
traits = []
for trait in self.trait_db_list:
if trait != "":
ts = trait.split(':')
gt = create_trait(name=ts[0], dataset_name=ts[1])
gt = retrieve_sample_data(gt, dataset, individuals)
for ind in individuals:
if ind in list(gt.data.keys()):
traits.append(gt.data[ind].value)
else:
traits.append("-999")
rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(
individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True))
# Use a data frame to store the objects
rPheno = r_data_frame(rPheno, check_names=False)
rGeno = r_data_frame(rGeno, check_names=False)
# Debug: Print the genotype and phenotype files to disk
#r_write_table(rGeno, "~/outputGN/geno.csv")
#r_write_table(rPheno, "~/outputGN/pheno.csv")
# Perform the CTL scan
res = self.r_CTLscan(rGeno, rPheno, strategy=strategy,
nperm=nperm, parametric=parametric, nthreads=6)
# Get significant interactions
significant = self.r_CTLsignificant(res, significance=significance)
# Create an image for output
self.results = {}
self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
self.results['ctlresult'] = significant
# Store the user specified parameters for the output page
self.results['requestform'] = requestform
# Create the lineplot
r_png(self.results['imgloc1'], width=1000,
height=600, type='cairo-png')
self.r_lineplot(res, significance=significance)
r_dev_off()
# We start from 2, since R starts from 1 :)
n = 2
for trait in self.trait_db_list:
# Create the QTL like CTL plots
self.results['imgurl' + \
str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \
self.results['imgurl' + str(n)]
r_png(self.results['imgloc' + str(n)],
width=1000, height=600, type='cairo-png')
self.r_plotCTLobject(
res, (n - 1), significance=significance, main='Phenotype ' + trait)
r_dev_off()
n = n + 1
# Flush any output from R
sys.stdout.flush()
# Create the interactive graph for cytoscape visualization (Nodes and Edges)
if not isinstance(significant, ri.RNULLType):
for x in range(len(significant[0])):
# Source
tsS = significant[0][x].split(':')
# Target
tsT = significant[2][x].split(':')
# Retrieve Source info from the DB
gtS = create_trait(name=tsS[0], dataset_name=tsS[1])
# Retrieve Target info from the DB
gtT = create_trait(name=tsT[0], dataset_name=tsT[1])
self.addNode(gtS)
self.addNode(gtT)
self.addEdge(gtS, gtT, significant, x)
# Update the trait name for the displayed table
significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name)
# Update the trait name for the displayed table
significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name)
self.elements = json.dumps(self.nodes_list + self.edges_list)
def loadImage(self, path, name):
imgfile = open(self.results[path], 'rb')
imgdata = imgfile.read()
imgB64 = base64.b64encode(imgdata)
bytesarray = array.array('B', imgB64)
self.results[name] = bytesarray
def render_image(self, results):
self.loadImage("imgloc1", "imgdata1")
n = 2
for trait in self.trait_db_list:
self.loadImage("imgloc" + str(n), "imgdata" + str(n))
n = n + 1
def process_results(self, results):
template_vars = {}
template_vars["results"] = self.results
template_vars["elements"] = self.elements
self.render_image(self.results)
sys.stdout.flush()
return(dict(template_vars))
|