# CTL analysis for GN2 # Author / Maintainer: Danny Arends import sys from numpy import * import rpy2.robjects as ro # R Objects import rpy2.rinterface as ri import simplejson as json from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR from gn2.utility import webqtlUtil # Random number for the image from gn2.utility import genofile_parser # genofile_parser import base64 import array import csv import itertools from gn2.base import data_set from gn2.base.trait import create_trait, retrieve_sample_data from gn2.utility import helper_functions from gn2.utility.tools import locate, GN2_BRANCH_URL from rpy2.robjects.packages import importr # Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_options = ro.r["options"] # Map the options function r_t = ro.r["t"] # Map the t function r_unlist = ro.r["unlist"] # Map the unlist function r_list = ro.r.list # Map the list function r_png = ro.r["png"] # Map the png function for plotting r_dev_off = ro.r["dev.off"] # Map the dev.off function r_write_table = ro.r["write.table"] # Map the write.table function r_data_frame = ro.r["data.frame"] # Map the write.table function r_as_numeric = ro.r["as.numeric"] # Map the write.table function class CTL: def __init__(self): # Load CTL - Should only be done once, since it is quite expensive r_library("ctl") r_options(stringsAsFactors=False) # Map the CTLscan function self.r_CTLscan = ro.r["CTLscan"] # Map the CTLsignificant function self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the ctl.lineplot function self.r_lineplot = ro.r["ctl.lineplot"] # Map the CTLsignificant function self.r_plotCTLobject = ro.r["plot.CTLobject"] self.nodes_list = [] self.edges_list = [] self.gn2_url = GN2_BRANCH_URL def addNode(self, gt): node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name), 'sid': str(gt.name), 'dataset': str(gt.dataset.name), 'label': gt.name, 'symbol': gt.symbol, 'geneid': gt.geneid, 'omim': gt.omim}} self.nodes_list.append(node_dict) def addEdge(self, gtS, gtT, significant, x): edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), 'source': str(gtS.name) + ":" + str(gtS.dataset.name), 'target': str(gtT.name) + ":" + str(gtT.dataset.name), 'lod': significant[3][x], 'color': "#ff0000", 'width': significant[3][x]} edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) def run_analysis(self, requestform): self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] strategy = requestform.get("strategy") nperm = int(requestform.get("nperm")) parametric = bool(requestform.get("parametric")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype datasetname = self.trait_db_list[0].split(":")[1] dataset = data_set.create_dataset(datasetname) genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() # Create a genotype matrix individuals = parser.individuals markers = [] markernames = [] for marker in parser.markers: markernames.append(marker["name"]) markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: if trait != "": ts = trait.split(':') gt = create_trait(name=ts[0], dataset_name=ts[1]) gt = retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in list(gt.data.keys()): traits.append(gt.data[ind].value) else: traits.append("-999") rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) # Use a data frame to store the objects rPheno = r_data_frame(rPheno, check_names=False) rGeno = r_data_frame(rGeno, check_names=False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, nperm=nperm, parametric=parametric, nthreads=6) # Get significant interactions significant = self.r_CTLsignificant(res, significance=significance) # Create an image for output self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant # Store the user specified parameters for the output page self.results['requestform'] = requestform # Create the lineplot r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') self.r_lineplot(res, significance=significance) r_dev_off() # We start from 2, since R starts from 1 :) n = 2 for trait in self.trait_db_list: # Create the QTL like CTL plots self.results['imgurl' + \ str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ self.results['imgurl' + str(n)] r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') self.r_plotCTLobject( res, (n - 1), significance=significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): for x in range(len(significant[0])): # Source tsS = significant[0][x].split(':') # Target tsT = significant[2][x].split(':') # Retrieve Source info from the DB gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) # Retrieve Target info from the DB gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) # Update the trait name for the displayed table significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) # Update the trait name for the displayed table significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) self.elements = json.dumps(self.nodes_list + self.edges_list) def loadImage(self, path, name): imgfile = open(self.results[path], 'rb') imgdata = imgfile.read() imgB64 = base64.b64encode(imgdata) bytesarray = array.array('B', imgB64) self.results[name] = bytesarray def render_image(self, results): self.loadImage("imgloc1", "imgdata1") n = 2 for trait in self.trait_db_list: self.loadImage("imgloc" + str(n), "imgdata" + str(n)) n = n + 1 def process_results(self, results): template_vars = {} template_vars["results"] = self.results template_vars["elements"] = self.elements self.render_image(self.results) sys.stdout.flush() return(dict(template_vars))