Finished integrating code that reads sample list from geno files

withouot using reaper and caches results so it doesn't need to read the file every single time someone loads a page
author: Zachary Sloan 2013-07-19 17:34:52 -0500
committer: Zachary Sloan 2013-07-19 17:34:52 -0500
commit: 82f493650909e2351035e26e9dc82b16498beb48 (patch)
tree: bc66a571b7d805a75549526061b16c197cd6bc67 /wqflask
parent: 6aaefdaae3a9fb068278d9b94d8cdf25d4f8d852 (diff)
download: genenetwork2-82f493650909e2351035e26e9dc82b16498beb48.tar.gz
6 files changed, 38 insertions, 45 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index cf219fda..d5aae31d 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -43,6 +43,8 @@ from utility import webqtlUtil
 from utility.benchmark import Bench
 from wqflask.my_pylmm.pyLMM import chunks
 
+from maintenance import get_group_samplelists
+
 from MySQLdb import escape_string as escape
 from pprint import pformat as pf
 
@@ -258,35 +260,25 @@ class DatasetGroup(object):
         if maternal and paternal:
             self.parlist = [maternal, paternal]
 
-    def get_sample_list(self):
-        genofilename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
-        genofile = open(genofilename, "r")
-        for line in genofile:
-            line = line.strip()
-            if line.startswith(("#", "@")):
-                continue
-            headline = line
-            break
-        headers = headline.split("\t")
-        if headers[3] == "Mb":
-            self.samplelist = headers[4:]
-        else:
-            self.samplelist = headers[3:]
-
-        #if genotype_1.type == "group" and self.parlist:
-        #    genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1])       #, F1=_f1)
-        #else:
-        #    genotype_2 = genotype_1 
 
-        #determine default genotype object
-        #if self.incparentsf1 and genotype_1.type != "intercross":
-        #    genotype = genotype_2
-        #else:
-        #    self.incparentsf1 = 0
-        #    genotype = genotype_1
+    def get_samplelist(self):
+        key = "samplelist:v4:" + self.name
+        print("key is:", key)
+        with Bench("Loading cache"):
+            result = Redis.get(key)
 
-        #self.samplelist = list(genotype.prgy)
-    
+        if result:
+            print("Sample List Cache hit!!!")
+            print("Before unjsonifying {}: {}".format(type(result), result))
+            self.samplelist = json.loads(result)
+            print("  type: ", type(self.samplelist))
+            print("  self.samplelist: ", self.samplelist)
+        else:
+            print("Cache not hit")
+            self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno")
+            print("after get_samplelist")
+            Redis.set(key, json.dumps(self.samplelist))
+            Redis.expire(key, 60*5)
 
     def read_genotype_file(self):
         '''Read genotype from .geno file instead of database'''
@@ -374,7 +366,7 @@ class DataSet(object):
         self.retrieve_other_names()
         
         self.group = DatasetGroup(self)   # sets self.group and self.group_id and gets genotype
-        self.group.read_genotype_file()
+        self.group.get_samplelist()
         self.species = species.TheSpecies(self)
 
 
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/maintenance/__init__.py
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index 2434038e..99e22904 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -7,37 +7,35 @@ import gzip
 from base import webqtlConfig
 
 
-def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"):
+def process_genofiles(geno_dir=webqtlConfig.GENODIR):
     os.chdir(geno_dir)
-    
-    for group_file in glob.glob("*"):
-        if group_file.lower().endswith(('.geno', '.geno.gz')):
+    for geno_file in glob.glob("*"):
+        if geno_file.lower().endswith(('.geno', '.geno.gz')):
             #group_name = genofilename.split('.')[0]
-            sample_list = get_sample_list(group_file)
-            print("\n\n{}\n\n".format(sample_list))
+            sample_list = get_samplelist(geno_file)
 
 
-def get_sample_list(group_file):
-    print(group_file)
-    genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file))
-    if genofilename.lower().endswith('.geno.gz'):
+def get_samplelist(geno_file):
+    genofilename = os.path.join(webqtlConfig.GENODIR, geno_file)
+    if os.path.isfile(genofilename + '.gz'):
+        genofilename += '.gz'
         genofile = gzip.open(genofilename)
     else:
         genofile = open(genofilename)
+        
     for line in genofile:
         line = line.strip()
         if not line:
             continue
         if line.startswith(("#", "@")):
             continue
-        headline = line
         break
-    headers = headline.split("\t")
+    
+    headers = line.split()
+    
     if headers[3] == "Mb":
         samplelist = headers[4:]
     else:
         samplelist = headers[3:]
     return samplelist
 
-if __name__ == '__main__':
-    get_sample_list_dir()
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index d76a32ce..44f5321e 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -8,11 +8,14 @@ from base.species import TheSpecies
 def get_species_dataset_trait(self, start_vars):
     #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
     self.dataset = data_set.create_dataset(start_vars['dataset'])
+    print("After creating dataset")
     self.species = TheSpecies(dataset=self.dataset)
+    print("After creating species")
     self.this_trait = GeneralTrait(dataset=self.dataset,
                                    name=start_vars['trait_id'],
                                    cellid=None)
+    print("After creating trait")
 
     #if read_genotype:
-    self.dataset.group.read_genotype_file()
+    #self.dataset.group.read_genotype_file()
     #self.genotype = self.dataset.group.genotype
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 60e42afb..7397c776 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -41,7 +41,7 @@ class ShowTrait(object):
         
         helper_functions.get_species_dataset_trait(self, kw)
 
-        self.dataset.group.read_genotype_file()
+        #self.dataset.group.read_genotype_file()
 
         # Todo: Add back in the ones we actually need from below, as we discover we need them
         hddn = OrderedDict()
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 813075b8..bd8f5c86 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -96,7 +96,7 @@ def search_page():
         else:
             return render_template("data_sharing.html", **template_vars.__dict__)
     else:
-        key = "search_results:v2:" + json.dumps(request.args, sort_keys=True)
+        key = "search_results:v3:" + json.dumps(request.args, sort_keys=True)
         print("key is:", pf(key))
         with Bench("Loading cache"):
             result = Redis.get(key)
author	Zachary Sloan	2013-07-19 17:34:52 -0500
committer	Zachary Sloan	2013-07-19 17:34:52 -0500
commit	82f493650909e2351035e26e9dc82b16498beb48 (patch)
tree	bc66a571b7d805a75549526061b16c197cd6bc67 /wqflask
parent	6aaefdaae3a9fb068278d9b94d8cdf25d4f8d852 (diff)
download	genenetwork2-82f493650909e2351035e26e9dc82b16498beb48.tar.gz