about summary refs log tree commit diff
path: root/gn2/wqflask/ctl
diff options
context:
space:
mode:
authorArun Isaac2023-12-29 18:55:37 +0000
committerArun Isaac2023-12-29 19:01:46 +0000
commit204a308be0f741726b9a620d88fbc22b22124c81 (patch)
treeb3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/wqflask/ctl
parent83062c75442160427b50420161bfcae2c5c34c84 (diff)
downloadgenenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for
"correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/wqflask/ctl')
-rw-r--r--gn2/wqflask/ctl/__init__.py0
-rw-r--r--gn2/wqflask/ctl/ctl_analysis.py214
-rw-r--r--gn2/wqflask/ctl/gn3_ctl_analysis.py132
3 files changed, 346 insertions, 0 deletions
diff --git a/gn2/wqflask/ctl/__init__.py b/gn2/wqflask/ctl/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/gn2/wqflask/ctl/__init__.py
diff --git a/gn2/wqflask/ctl/ctl_analysis.py b/gn2/wqflask/ctl/ctl_analysis.py
new file mode 100644
index 00000000..513c1b1c
--- /dev/null
+++ b/gn2/wqflask/ctl/ctl_analysis.py
@@ -0,0 +1,214 @@
+# CTL analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+import sys
+from numpy import *
+import rpy2.robjects as ro                    # R Objects
+import rpy2.rinterface as ri
+
+import simplejson as json
+
+from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR
+from gn2.utility import webqtlUtil                # Random number for the image
+from gn2.utility import genofile_parser           # genofile_parser
+
+import base64
+import array
+import csv
+import itertools
+
+from gn2.base import data_set
+from gn2.base.trait import create_trait, retrieve_sample_data
+
+from gn2.utility import helper_functions
+from gn2.utility.tools import locate, GN2_BRANCH_URL
+
+from rpy2.robjects.packages import importr
+
+
+# Get pointers to some common R functions
+r_library = ro.r["library"]             # Map the library function
+r_options = ro.r["options"]             # Map the options function
+r_t = ro.r["t"]                   # Map the t function
+r_unlist = ro.r["unlist"]              # Map the unlist function
+r_list = ro.r.list                   # Map the list function
+r_png = ro.r["png"]                 # Map the png function for plotting
+r_dev_off = ro.r["dev.off"]             # Map the dev.off function
+r_write_table = ro.r["write.table"]         # Map the write.table function
+r_data_frame = ro.r["data.frame"]         # Map the write.table function
+r_as_numeric = ro.r["as.numeric"]         # Map the write.table function
+
+
+class CTL:
+    def __init__(self):
+        # Load CTL - Should only be done once, since it is quite expensive
+        r_library("ctl")
+        r_options(stringsAsFactors=False)
+        # Map the CTLscan function
+        self.r_CTLscan = ro.r["CTLscan"]
+        # Map the CTLsignificant function
+        self.r_CTLsignificant = ro.r["CTLsignificant"]
+        # Map the ctl.lineplot function
+        self.r_lineplot = ro.r["ctl.lineplot"]
+        # Map the CTLsignificant function
+        self.r_plotCTLobject = ro.r["plot.CTLobject"]
+        self.nodes_list = []
+        self.edges_list = []
+
+        self.gn2_url = GN2_BRANCH_URL
+
+    def addNode(self, gt):
+        node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name),
+                              'sid': str(gt.name),
+                              'dataset': str(gt.dataset.name),
+                              'label': gt.name,
+                              'symbol': gt.symbol,
+                              'geneid': gt.geneid,
+                              'omim': gt.omim}}
+        self.nodes_list.append(node_dict)
+
+    def addEdge(self, gtS, gtT, significant, x):
+        edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol),
+                     'source': str(gtS.name) + ":" + str(gtS.dataset.name),
+                     'target': str(gtT.name) + ":" + str(gtT.dataset.name),
+                     'lod': significant[3][x],
+                     'color': "#ff0000",
+                     'width': significant[3][x]}
+        edge_dict = {'data': edge_data}
+        self.edges_list.append(edge_dict)
+
+    def run_analysis(self, requestform):
+        self.trait_db_list = [trait.strip()
+                              for trait in requestform['trait_list'].split(',')]
+        self.trait_db_list = [x for x in self.trait_db_list if x]
+        strategy = requestform.get("strategy")
+        nperm = int(requestform.get("nperm"))
+        parametric = bool(requestform.get("parametric"))
+        significance = float(requestform.get("significance"))
+
+        # Get the name of the .geno file belonging to the first phenotype
+        datasetname = self.trait_db_list[0].split(":")[1]
+        dataset = data_set.create_dataset(datasetname)
+
+        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
+        parser = genofile_parser.ConvertGenoFile(genofilelocation)
+        parser.process_csv()
+        # Create a genotype matrix
+        individuals = parser.individuals
+        markers = []
+        markernames = []
+        for marker in parser.markers:
+            markernames.append(marker["name"])
+            markers.append(marker["genotypes"])
+
+        genotypes = list(itertools.chain(*markers))
+
+        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(
+            individuals), dimnames=r_list(markernames, individuals), byrow=True))
+
+        # Create a phenotype matrix
+        traits = []
+        for trait in self.trait_db_list:
+            if trait != "":
+                ts = trait.split(':')
+                gt = create_trait(name=ts[0], dataset_name=ts[1])
+                gt = retrieve_sample_data(gt, dataset, individuals)
+                for ind in individuals:
+                    if ind in list(gt.data.keys()):
+                        traits.append(gt.data[ind].value)
+                    else:
+                        traits.append("-999")
+
+        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(
+            individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True))
+
+        # Use a data frame to store the objects
+        rPheno = r_data_frame(rPheno, check_names=False)
+        rGeno = r_data_frame(rGeno, check_names=False)
+
+        # Debug: Print the genotype and phenotype files to disk
+        #r_write_table(rGeno, "~/outputGN/geno.csv")
+        #r_write_table(rPheno, "~/outputGN/pheno.csv")
+
+        # Perform the CTL scan
+        res = self.r_CTLscan(rGeno, rPheno, strategy=strategy,
+                             nperm=nperm, parametric=parametric, nthreads=6)
+
+        # Get significant interactions
+        significant = self.r_CTLsignificant(res, significance=significance)
+
+        # Create an image for output
+        self.results = {}
+        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
+        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
+
+        self.results['ctlresult'] = significant
+        # Store the user specified parameters for the output page
+        self.results['requestform'] = requestform
+
+        # Create the lineplot
+        r_png(self.results['imgloc1'], width=1000,
+              height=600, type='cairo-png')
+        self.r_lineplot(res, significance=significance)
+        r_dev_off()
+
+        # We start from 2, since R starts from 1 :)
+        n = 2
+        for trait in self.trait_db_list:
+            # Create the QTL like CTL plots
+            self.results['imgurl' + \
+                         str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
+            self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \
+                self.results['imgurl' + str(n)]
+            r_png(self.results['imgloc' + str(n)],
+                  width=1000, height=600, type='cairo-png')
+            self.r_plotCTLobject(
+                res, (n - 1), significance=significance, main='Phenotype ' + trait)
+            r_dev_off()
+            n = n + 1
+
+        # Flush any output from R
+        sys.stdout.flush()
+
+        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
+        if not isinstance(significant, ri.RNULLType):
+            for x in range(len(significant[0])):
+                # Source
+                tsS = significant[0][x].split(':')
+                # Target
+                tsT = significant[2][x].split(':')
+                # Retrieve Source info from the DB
+                gtS = create_trait(name=tsS[0], dataset_name=tsS[1])
+                # Retrieve Target info from the DB
+                gtT = create_trait(name=tsT[0], dataset_name=tsT[1])
+                self.addNode(gtS)
+                self.addNode(gtT)
+                self.addEdge(gtS, gtT, significant, x)
+
+                # Update the trait name for the displayed table
+                significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name)
+                # Update the trait name for the displayed table
+                significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name)
+
+        self.elements = json.dumps(self.nodes_list + self.edges_list)
+
+    def loadImage(self, path, name):
+        imgfile = open(self.results[path], 'rb')
+        imgdata = imgfile.read()
+        imgB64 = base64.b64encode(imgdata)
+        bytesarray = array.array('B', imgB64)
+        self.results[name] = bytesarray
+
+    def render_image(self, results):
+        self.loadImage("imgloc1", "imgdata1")
+        n = 2
+        for trait in self.trait_db_list:
+            self.loadImage("imgloc" + str(n), "imgdata" + str(n))
+            n = n + 1
+
+    def process_results(self, results):
+        template_vars = {}
+        template_vars["results"] = self.results
+        template_vars["elements"] = self.elements
+        self.render_image(self.results)
+        sys.stdout.flush()
+        return(dict(template_vars))
diff --git a/gn2/wqflask/ctl/gn3_ctl_analysis.py b/gn2/wqflask/ctl/gn3_ctl_analysis.py
new file mode 100644
index 00000000..64c2ff0d
--- /dev/null
+++ b/gn2/wqflask/ctl/gn3_ctl_analysis.py
@@ -0,0 +1,132 @@
+import requests
+import itertools
+
+from gn2.utility import genofile_parser
+from gn2.utility.tools import GN3_LOCAL_URL
+from gn2.utility.tools import locate
+
+from gn2.base.trait import create_trait
+from gn2.base.trait import retrieve_sample_data
+from gn2.base import data_set
+
+
+def process_significance_data(dataset):
+    col_names = ["trait", "marker", "trait_2", "LOD", "dcor"]
+    dataset_rows = [[] for _ in range(len(dataset["trait"]))]
+    for col in col_names:
+        for (index, col_data) in enumerate(dataset[col]):
+            if col in ["dcor", "LOD"]:
+                dataset_rows[index].append(round(float(col_data), 2))
+            else:
+                dataset_rows[index].append(col_data)
+
+    return {
+        "col_names": col_names,
+        "data_set_rows": dataset_rows
+    }
+
+
+def parse_geno_data(dataset_group_name) -> dict:
+    """
+    Args:
+        dataset_group_name: string name
+
+    @returns : dict with keys genotypes,markernames & individuals
+    """
+    genofile_location = locate(dataset_group_name + ".geno", "genotype")
+    parser = genofile_parser.ConvertGenoFile(genofile_location)
+    parser.process_csv()
+    markers = []
+    markernames = []
+    for marker in parser.markers:
+        markernames.append(marker["name"])
+        markers.append(marker["genotypes"])
+
+    return {
+
+        "genotypes": list(itertools.chain(*markers)),
+        "markernames": markernames,
+        "individuals": parser.individuals
+
+
+    }
+
+
+def parse_phenotype_data(trait_list, dataset, individuals):
+    """
+    Args:
+        trait_list:list contains the traits
+        dataset:  object
+        individuals:a list contains the individual vals
+    Returns:
+           traits_db_List:parsed list of traits 
+           traits: list contains trait names
+           individuals
+
+    """
+
+    traits = []
+    for trait in trait_list:
+        if trait != "":
+            ts = trait.split(':')
+            gt = create_trait(name=ts[0], dataset_name=ts[1])
+            gt = retrieve_sample_data(gt, dataset, individuals)
+            for ind in individuals:
+                if ind in list(gt.data.keys()):
+                    traits.append(gt.data[ind].value)
+                else:
+                    traits.append("-999")
+
+    return {
+        "trait_db_list": trait_list,
+        "traits": traits,
+        "individuals": individuals
+    }
+
+
+def parse_form_data(form_data: dict):
+
+    trait_db_list = [trait.strip()
+                     for trait in form_data['trait_list'].split(',')]
+
+    form_data["trait_db_list"] = [x for x in trait_db_list if x]
+    form_data["nperm"] = int(form_data["nperm"])
+    form_data["significance"] = float(form_data["significance"])
+    form_data["strategy"] = form_data["strategy"].capitalize()
+
+    return form_data
+
+
+def run_ctl(requestform):
+    """function to make an api call
+    to gn3 and run ctl"""
+    ctl_api = f"{GN3_LOCAL_URL}/api/ctl/run_ctl"
+
+    form_data = parse_form_data(requestform.to_dict())
+    trait_db_list = form_data["trait_db_list"]
+    dataset = data_set.create_dataset(trait_db_list[0].split(":")[1])
+    geno_data = parse_geno_data(dataset.group.name)
+    pheno_data = parse_phenotype_data(
+        trait_db_list, dataset, geno_data["individuals"])
+
+    try:
+
+        response = requests.post(ctl_api, json={
+
+            "genoData": geno_data,
+            "phenoData": pheno_data,
+            **form_data,
+
+        })
+        if response.status_code != 200:
+            return {"error": response.json()}
+        response = response.json()["results"]
+        response["significance_data"] = process_significance_data(
+            response["significance_data"])
+
+        return response
+
+    except requests.exceptions.ConnectionError:
+        return {
+            "error": "A connection error to perform computation occurred"
+        }