aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xwqflask/base/data_set.py48
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/get_group_samplelists.py26
-rw-r--r--wqflask/utility/helper_functions.py5
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py2
-rw-r--r--wqflask/wqflask/views.py2
6 files changed, 38 insertions, 45 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index cf219fda..d5aae31d 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -43,6 +43,8 @@ from utility import webqtlUtil
from utility.benchmark import Bench
from wqflask.my_pylmm.pyLMM import chunks
+from maintenance import get_group_samplelists
+
from MySQLdb import escape_string as escape
from pprint import pformat as pf
@@ -258,35 +260,25 @@ class DatasetGroup(object):
if maternal and paternal:
self.parlist = [maternal, paternal]
- def get_sample_list(self):
- genofilename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
- genofile = open(genofilename, "r")
- for line in genofile:
- line = line.strip()
- if line.startswith(("#", "@")):
- continue
- headline = line
- break
- headers = headline.split("\t")
- if headers[3] == "Mb":
- self.samplelist = headers[4:]
- else:
- self.samplelist = headers[3:]
-
- #if genotype_1.type == "group" and self.parlist:
- # genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
- #else:
- # genotype_2 = genotype_1
- #determine default genotype object
- #if self.incparentsf1 and genotype_1.type != "intercross":
- # genotype = genotype_2
- #else:
- # self.incparentsf1 = 0
- # genotype = genotype_1
+ def get_samplelist(self):
+ key = "samplelist:v4:" + self.name
+ print("key is:", key)
+ with Bench("Loading cache"):
+ result = Redis.get(key)
- #self.samplelist = list(genotype.prgy)
-
+ if result:
+ print("Sample List Cache hit!!!")
+ print("Before unjsonifying {}: {}".format(type(result), result))
+ self.samplelist = json.loads(result)
+ print(" type: ", type(self.samplelist))
+ print(" self.samplelist: ", self.samplelist)
+ else:
+ print("Cache not hit")
+ self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno")
+ print("after get_samplelist")
+ Redis.set(key, json.dumps(self.samplelist))
+ Redis.expire(key, 60*5)
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
@@ -374,7 +366,7 @@ class DataSet(object):
self.retrieve_other_names()
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
- self.group.read_genotype_file()
+ self.group.get_samplelist()
self.species = species.TheSpecies(self)
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/maintenance/__init__.py
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index 2434038e..99e22904 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -7,37 +7,35 @@ import gzip
from base import webqtlConfig
-def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"):
+def process_genofiles(geno_dir=webqtlConfig.GENODIR):
os.chdir(geno_dir)
-
- for group_file in glob.glob("*"):
- if group_file.lower().endswith(('.geno', '.geno.gz')):
+ for geno_file in glob.glob("*"):
+ if geno_file.lower().endswith(('.geno', '.geno.gz')):
#group_name = genofilename.split('.')[0]
- sample_list = get_sample_list(group_file)
- print("\n\n{}\n\n".format(sample_list))
+ sample_list = get_samplelist(geno_file)
-def get_sample_list(group_file):
- print(group_file)
- genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file))
- if genofilename.lower().endswith('.geno.gz'):
+def get_samplelist(geno_file):
+ genofilename = os.path.join(webqtlConfig.GENODIR, geno_file)
+ if os.path.isfile(genofilename + '.gz'):
+ genofilename += '.gz'
genofile = gzip.open(genofilename)
else:
genofile = open(genofilename)
+
for line in genofile:
line = line.strip()
if not line:
continue
if line.startswith(("#", "@")):
continue
- headline = line
break
- headers = headline.split("\t")
+
+ headers = line.split()
+
if headers[3] == "Mb":
samplelist = headers[4:]
else:
samplelist = headers[3:]
return samplelist
-if __name__ == '__main__':
- get_sample_list_dir()
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index d76a32ce..44f5321e 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -8,11 +8,14 @@ from base.species import TheSpecies
def get_species_dataset_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
self.dataset = data_set.create_dataset(start_vars['dataset'])
+ print("After creating dataset")
self.species = TheSpecies(dataset=self.dataset)
+ print("After creating species")
self.this_trait = GeneralTrait(dataset=self.dataset,
name=start_vars['trait_id'],
cellid=None)
+ print("After creating trait")
#if read_genotype:
- self.dataset.group.read_genotype_file()
+ #self.dataset.group.read_genotype_file()
#self.genotype = self.dataset.group.genotype
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 60e42afb..7397c776 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -41,7 +41,7 @@ class ShowTrait(object):
helper_functions.get_species_dataset_trait(self, kw)
- self.dataset.group.read_genotype_file()
+ #self.dataset.group.read_genotype_file()
# Todo: Add back in the ones we actually need from below, as we discover we need them
hddn = OrderedDict()
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 813075b8..bd8f5c86 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -96,7 +96,7 @@ def search_page():
else:
return render_template("data_sharing.html", **template_vars.__dict__)
else:
- key = "search_results:v2:" + json.dumps(request.args, sort_keys=True)
+ key = "search_results:v3:" + json.dumps(request.args, sort_keys=True)
print("key is:", pf(key))
with Bench("Loading cache"):
result = Redis.get(key)