diff options
Diffstat (limited to 'gn2/wqflask/ctl')
-rw-r--r-- | gn2/wqflask/ctl/__init__.py | 0 | ||||
-rw-r--r-- | gn2/wqflask/ctl/ctl_analysis.py | 214 | ||||
-rw-r--r-- | gn2/wqflask/ctl/gn3_ctl_analysis.py | 132 |
3 files changed, 346 insertions, 0 deletions
diff --git a/gn2/wqflask/ctl/__init__.py b/gn2/wqflask/ctl/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/gn2/wqflask/ctl/__init__.py diff --git a/gn2/wqflask/ctl/ctl_analysis.py b/gn2/wqflask/ctl/ctl_analysis.py new file mode 100644 index 00000000..513c1b1c --- /dev/null +++ b/gn2/wqflask/ctl/ctl_analysis.py @@ -0,0 +1,214 @@ +# CTL analysis for GN2 +# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> +import sys +from numpy import * +import rpy2.robjects as ro # R Objects +import rpy2.rinterface as ri + +import simplejson as json + +from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR +from gn2.utility import webqtlUtil # Random number for the image +from gn2.utility import genofile_parser # genofile_parser + +import base64 +import array +import csv +import itertools + +from gn2.base import data_set +from gn2.base.trait import create_trait, retrieve_sample_data + +from gn2.utility import helper_functions +from gn2.utility.tools import locate, GN2_BRANCH_URL + +from rpy2.robjects.packages import importr + + +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_t = ro.r["t"] # Map the t function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function +r_write_table = ro.r["write.table"] # Map the write.table function +r_data_frame = ro.r["data.frame"] # Map the write.table function +r_as_numeric = ro.r["as.numeric"] # Map the write.table function + + +class CTL: + def __init__(self): + # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") + r_options(stringsAsFactors=False) + # Map the CTLscan function + self.r_CTLscan = ro.r["CTLscan"] + # Map the CTLsignificant function + self.r_CTLsignificant = ro.r["CTLsignificant"] + # Map the ctl.lineplot function + self.r_lineplot = ro.r["ctl.lineplot"] + # Map the CTLsignificant function + self.r_plotCTLobject = ro.r["plot.CTLobject"] + self.nodes_list = [] + self.edges_list = [] + + self.gn2_url = GN2_BRANCH_URL + + def addNode(self, gt): + node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name), + 'sid': str(gt.name), + 'dataset': str(gt.dataset.name), + 'label': gt.name, + 'symbol': gt.symbol, + 'geneid': gt.geneid, + 'omim': gt.omim}} + self.nodes_list.append(node_dict) + + def addEdge(self, gtS, gtT, significant, x): + edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), + 'source': str(gtS.name) + ":" + str(gtS.dataset.name), + 'target': str(gtT.name) + ":" + str(gtT.dataset.name), + 'lod': significant[3][x], + 'color': "#ff0000", + 'width': significant[3][x]} + edge_dict = {'data': edge_data} + self.edges_list.append(edge_dict) + + def run_analysis(self, requestform): + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [x for x in self.trait_db_list if x] + strategy = requestform.get("strategy") + nperm = int(requestform.get("nperm")) + parametric = bool(requestform.get("parametric")) + significance = float(requestform.get("significance")) + + # Get the name of the .geno file belonging to the first phenotype + datasetname = self.trait_db_list[0].split(":")[1] + dataset = data_set.create_dataset(datasetname) + + genofilelocation = locate(dataset.group.name + ".geno", "genotype") + parser = genofile_parser.ConvertGenoFile(genofilelocation) + parser.process_csv() + # Create a genotype matrix + individuals = parser.individuals + markers = [] + markernames = [] + for marker in parser.markers: + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) + + genotypes = list(itertools.chain(*markers)) + + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( + individuals), dimnames=r_list(markernames, individuals), byrow=True)) + + # Create a phenotype matrix + traits = [] + for trait in self.trait_db_list: + if trait != "": + ts = trait.split(':') + gt = create_trait(name=ts[0], dataset_name=ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) + for ind in individuals: + if ind in list(gt.data.keys()): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( + individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) + + # Use a data frame to store the objects + rPheno = r_data_frame(rPheno, check_names=False) + rGeno = r_data_frame(rGeno, check_names=False) + + # Debug: Print the genotype and phenotype files to disk + #r_write_table(rGeno, "~/outputGN/geno.csv") + #r_write_table(rPheno, "~/outputGN/pheno.csv") + + # Perform the CTL scan + res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, + nperm=nperm, parametric=parametric, nthreads=6) + + # Get significant interactions + significant = self.r_CTLsignificant(res, significance=significance) + + # Create an image for output + self.results = {} + self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" + self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] + + self.results['ctlresult'] = significant + # Store the user specified parameters for the output page + self.results['requestform'] = requestform + + # Create the lineplot + r_png(self.results['imgloc1'], width=1000, + height=600, type='cairo-png') + self.r_lineplot(res, significance=significance) + r_dev_off() + + # We start from 2, since R starts from 1 :) + n = 2 + for trait in self.trait_db_list: + # Create the QTL like CTL plots + self.results['imgurl' + \ + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], + width=1000, height=600, type='cairo-png') + self.r_plotCTLobject( + res, (n - 1), significance=significance, main='Phenotype ' + trait) + r_dev_off() + n = n + 1 + + # Flush any output from R + sys.stdout.flush() + + # Create the interactive graph for cytoscape visualization (Nodes and Edges) + if not isinstance(significant, ri.RNULLType): + for x in range(len(significant[0])): + # Source + tsS = significant[0][x].split(':') + # Target + tsT = significant[2][x].split(':') + # Retrieve Source info from the DB + gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) + # Retrieve Target info from the DB + gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) + self.addNode(gtS) + self.addNode(gtT) + self.addEdge(gtS, gtT, significant, x) + + # Update the trait name for the displayed table + significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) + # Update the trait name for the displayed table + significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) + + self.elements = json.dumps(self.nodes_list + self.edges_list) + + def loadImage(self, path, name): + imgfile = open(self.results[path], 'rb') + imgdata = imgfile.read() + imgB64 = base64.b64encode(imgdata) + bytesarray = array.array('B', imgB64) + self.results[name] = bytesarray + + def render_image(self, results): + self.loadImage("imgloc1", "imgdata1") + n = 2 + for trait in self.trait_db_list: + self.loadImage("imgloc" + str(n), "imgdata" + str(n)) + n = n + 1 + + def process_results(self, results): + template_vars = {} + template_vars["results"] = self.results + template_vars["elements"] = self.elements + self.render_image(self.results) + sys.stdout.flush() + return(dict(template_vars)) diff --git a/gn2/wqflask/ctl/gn3_ctl_analysis.py b/gn2/wqflask/ctl/gn3_ctl_analysis.py new file mode 100644 index 00000000..64c2ff0d --- /dev/null +++ b/gn2/wqflask/ctl/gn3_ctl_analysis.py @@ -0,0 +1,132 @@ +import requests +import itertools + +from gn2.utility import genofile_parser +from gn2.utility.tools import GN3_LOCAL_URL +from gn2.utility.tools import locate + +from gn2.base.trait import create_trait +from gn2.base.trait import retrieve_sample_data +from gn2.base import data_set + + +def process_significance_data(dataset): + col_names = ["trait", "marker", "trait_2", "LOD", "dcor"] + dataset_rows = [[] for _ in range(len(dataset["trait"]))] + for col in col_names: + for (index, col_data) in enumerate(dataset[col]): + if col in ["dcor", "LOD"]: + dataset_rows[index].append(round(float(col_data), 2)) + else: + dataset_rows[index].append(col_data) + + return { + "col_names": col_names, + "data_set_rows": dataset_rows + } + + +def parse_geno_data(dataset_group_name) -> dict: + """ + Args: + dataset_group_name: string name + + @returns : dict with keys genotypes,markernames & individuals + """ + genofile_location = locate(dataset_group_name + ".geno", "genotype") + parser = genofile_parser.ConvertGenoFile(genofile_location) + parser.process_csv() + markers = [] + markernames = [] + for marker in parser.markers: + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) + + return { + + "genotypes": list(itertools.chain(*markers)), + "markernames": markernames, + "individuals": parser.individuals + + + } + + +def parse_phenotype_data(trait_list, dataset, individuals): + """ + Args: + trait_list:list contains the traits + dataset: object + individuals:a list contains the individual vals + Returns: + traits_db_List:parsed list of traits + traits: list contains trait names + individuals + + """ + + traits = [] + for trait in trait_list: + if trait != "": + ts = trait.split(':') + gt = create_trait(name=ts[0], dataset_name=ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) + for ind in individuals: + if ind in list(gt.data.keys()): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + return { + "trait_db_list": trait_list, + "traits": traits, + "individuals": individuals + } + + +def parse_form_data(form_data: dict): + + trait_db_list = [trait.strip() + for trait in form_data['trait_list'].split(',')] + + form_data["trait_db_list"] = [x for x in trait_db_list if x] + form_data["nperm"] = int(form_data["nperm"]) + form_data["significance"] = float(form_data["significance"]) + form_data["strategy"] = form_data["strategy"].capitalize() + + return form_data + + +def run_ctl(requestform): + """function to make an api call + to gn3 and run ctl""" + ctl_api = f"{GN3_LOCAL_URL}/api/ctl/run_ctl" + + form_data = parse_form_data(requestform.to_dict()) + trait_db_list = form_data["trait_db_list"] + dataset = data_set.create_dataset(trait_db_list[0].split(":")[1]) + geno_data = parse_geno_data(dataset.group.name) + pheno_data = parse_phenotype_data( + trait_db_list, dataset, geno_data["individuals"]) + + try: + + response = requests.post(ctl_api, json={ + + "genoData": geno_data, + "phenoData": pheno_data, + **form_data, + + }) + if response.status_code != 200: + return {"error": response.json()} + response = response.json()["results"] + response["significance_data"] = process_significance_data( + response["significance_data"]) + + return response + + except requests.exceptions.ConnectionError: + return { + "error": "A connection error to perform computation occurred" + } |