Adding code to do initial CTL mapping (working on the BXD)

author: DannyArends 2016-03-23 23:08:12 +0100
committer: Pjotr Prins 2016-04-20 10:17:51 +0000
commit: 210d8109a4171ddf6ea6a3f70a1bfbea7b327722 (patch)
tree: a655cc1eda7be8ffe0a2a71e27d37545e1c0675a /wqflask
parent: 3d505d997511cd8f7b9f14510059cb2983edc6d4 (diff)
download: genenetwork2-210d8109a4171ddf6ea6a3f70a1bfbea7b327722.tar.gz
1 files changed, 77 insertions, 22 deletions
diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py
index 8a2f1954..f998dc59 100644
--- a/wqflask/wqflask/ctl/ctl_analysis.py
+++ b/wqflask/wqflask/ctl/ctl_analysis.py
@@ -13,6 +13,10 @@ from utility import genofile_parser           # genofile_parser
 import base64
 import array
 import csv
+import itertools
+
+from base import data_set
+from base import trait as TRAIT
 
 from utility import helper_functions
 from utility.tools import locate
@@ -26,9 +30,11 @@ r_options       = ro.r["options"]             # Map the options function
 r_read_csv      = ro.r["read.csv"]            # Map the read.csv function
 r_dim           = ro.r["dim"]                 # Map the dim function
 r_c             = ro.r["c"]                   # Map the c function
+r_t             = ro.r["t"]                   # Map the t function
 r_cat           = ro.r["cat"]                 # Map the cat function
 r_paste         = ro.r["paste"]               # Map the paste function
 r_unlist        = ro.r["unlist"]              # Map the unlist function
+r_head          = ro.r["head"]                # Map the unlist function
 r_unique        = ro.r["unique"]              # Map the unique function
 r_length        = ro.r["length"]              # Map the length function
 r_unlist        = ro.r["unlist"]              # Map the unlist function
@@ -42,6 +48,12 @@ r_is_NA         = ro.r["is.na"]               # Map the is.na function
 r_file          = ro.r["file"]                # Map the file function
 r_png           = ro.r["png"]                 # Map the png function for plotting
 r_dev_off       = ro.r["dev.off"]             # Map the dev.off function
+r_save_image    = ro.r["save.image"]          # Map the save.image function
+r_class         = ro.r["class"]               # Map the class function
+r_save          = ro.r["save"]                # Map the save function
+r_write_table   = ro.r["write.table"]         # Map the write.table function
+r_as_data_frame   = ro.r["as.data.frame"]         # Map the write.table function
+r_data_frame   = ro.r["data.frame"]         # Map the write.table function
 
 class CTL(object):
     def __init__(self):
@@ -61,31 +73,73 @@ class CTL(object):
 
     def run_analysis(self, requestform):
         print("Starting CTL analysis on dataset")
-
         self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
-        print("Retrieved phenotype data from database", requestform['trait_list'])
-
-        helper_functions.get_trait_db_obs(self, self.trait_db_list)
-
-        self.input = {}           # self.input contains the phenotype values we need to send to R
-        strains = []              # All the strains we have data for (contains duplicates)
-        traits  = []              # All the traits we have data for (should not contain duplicates)
-        genotypebasename = ""
-        for trait in self.trait_list:
-            traits.append(trait[0].name)
-            if genotypebasename == "":
-              genotypebasename = trait[1].group.name
-            self.input[trait[0].name] = {}
-            for strain in trait[0].data:
-                strains.append(strain)
-                self.input[trait[0].name][strain]  = trait[0].data[strain].value
-
-        genofilelocation = locate(genotypebasename + ".geno", "genotype")
+        self.trait_db_list = [x for x in self.trait_db_list if x]
+
+        datasetname = self.trait_db_list[0].split(":")[1]
+        dataset = data_set.create_dataset(datasetname)
+
+        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
         parser = genofile_parser.ConvertGenoFile(genofilelocation)
         parser.process_csv()
-        print(parser.markers)
+
+        individuals = parser.individuals
+        markers = []
+        markernames = []
+        x = 1
+        for marker in parser.markers:
+          markernames.append(marker["name"])
+          markers.append(marker["genotypes"])
+          if x == 1:
+            print marker["genotypes"]
+
+          x = x +1
+  
+        genotypes = list(itertools.chain(*markers))
+        print(len(genotypes) / len(individuals), "==", len(parser.markers))
+
+        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))
+
+        print(r_dim(rGeno)) 
+        #self.trait_names = [trait.split(':')[0].strip() for trait in self.trait_db_list]
+        #print(self.trait_names)
+
+
+        traits = []
+        for trait in self.trait_db_list:
+          print("retrieving data for", trait)
+          if trait != "":
+            ts = trait.split(':')
+            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
+            gt.retrieve_sample_data(individuals)
+            for ind in individuals:
+              if ind in gt.data.keys():
+                traits.append(gt.data[ind].value)
+              else:
+                traits.append("-999")
+
+        print len(traits) / len(individuals), "==", len(self.trait_db_list)
+        rPheno = r_t(ro.r.matrix(r_unlist(traits), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))
+
+        rPheno = r_data_frame(rPheno)
+        rGeno = r_data_frame(rGeno)
+
+        print(r_class(rPheno))
+        print(r_class(rGeno))
+
+
+
+        r_write_table(rPheno, "~/pheno.csv")
+        r_write_table(rGeno,  "~/geno.csv")
+        res = self.r_CTLscan(rGeno, rPheno)
+
         self.results = {}
-        sys.stdout.flush()
+        self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
+        self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl']
+        r_png(self.results['imgloc'], width=1000, height=600)
+        self.r_lineplot(res, significance = 1)
+        r_dev_off()
+     #   sys.stdout.flush()
 
     def render_image(self, results):
         print("pre-loading imgage results:", self.results['imgloc'])
@@ -98,7 +152,8 @@ class CTL(object):
     def process_results(self, results):
         print("Processing CTL output")
         template_vars = {}
-        template_vars["input"] = self.input
+        template_vars["results"] = self.results
+        self.render_image(self.results)
         sys.stdout.flush()
         return(dict(template_vars))
author	DannyArends	2016-03-23 23:08:12 +0100
committer	Pjotr Prins	2016-04-20 10:17:51 +0000
commit	210d8109a4171ddf6ea6a3f70a1bfbea7b327722 (patch)
tree	a655cc1eda7be8ffe0a2a71e27d37545e1c0675a /wqflask
parent	3d505d997511cd8f7b9f14510059cb2983edc6d4 (diff)
download	genenetwork2-210d8109a4171ddf6ea6a3f70a1bfbea7b327722.tar.gz