diff options
author | Zachary Sloan | 2013-07-19 17:34:52 -0500 |
---|---|---|
committer | Zachary Sloan | 2013-07-19 17:34:52 -0500 |
commit | 82f493650909e2351035e26e9dc82b16498beb48 (patch) | |
tree | bc66a571b7d805a75549526061b16c197cd6bc67 /wqflask | |
parent | 6aaefdaae3a9fb068278d9b94d8cdf25d4f8d852 (diff) | |
download | genenetwork2-82f493650909e2351035e26e9dc82b16498beb48.tar.gz |
Finished integrating code that reads sample list from geno files
withouot using reaper and caches results so it doesn't need
to read the file every single time someone loads a page
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 48 | ||||
-rw-r--r-- | wqflask/maintenance/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 26 | ||||
-rw-r--r-- | wqflask/utility/helper_functions.py | 5 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 2 |
6 files changed, 38 insertions, 45 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index cf219fda..d5aae31d 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -43,6 +43,8 @@ from utility import webqtlUtil from utility.benchmark import Bench from wqflask.my_pylmm.pyLMM import chunks +from maintenance import get_group_samplelists + from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -258,35 +260,25 @@ class DatasetGroup(object): if maternal and paternal: self.parlist = [maternal, paternal] - def get_sample_list(self): - genofilename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) - genofile = open(genofilename, "r") - for line in genofile: - line = line.strip() - if line.startswith(("#", "@")): - continue - headline = line - break - headers = headline.split("\t") - if headers[3] == "Mb": - self.samplelist = headers[4:] - else: - self.samplelist = headers[3:] - - #if genotype_1.type == "group" and self.parlist: - # genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) - #else: - # genotype_2 = genotype_1 - #determine default genotype object - #if self.incparentsf1 and genotype_1.type != "intercross": - # genotype = genotype_2 - #else: - # self.incparentsf1 = 0 - # genotype = genotype_1 + def get_samplelist(self): + key = "samplelist:v4:" + self.name + print("key is:", key) + with Bench("Loading cache"): + result = Redis.get(key) - #self.samplelist = list(genotype.prgy) - + if result: + print("Sample List Cache hit!!!") + print("Before unjsonifying {}: {}".format(type(result), result)) + self.samplelist = json.loads(result) + print(" type: ", type(self.samplelist)) + print(" self.samplelist: ", self.samplelist) + else: + print("Cache not hit") + self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") + print("after get_samplelist") + Redis.set(key, json.dumps(self.samplelist)) + Redis.expire(key, 60*5) def read_genotype_file(self): '''Read genotype from .geno file instead of database''' @@ -374,7 +366,7 @@ class DataSet(object): self.retrieve_other_names() self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype - self.group.read_genotype_file() + self.group.get_samplelist() self.species = species.TheSpecies(self) diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/wqflask/maintenance/__init__.py diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 2434038e..99e22904 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -7,37 +7,35 @@ import gzip from base import webqtlConfig -def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"): +def process_genofiles(geno_dir=webqtlConfig.GENODIR): os.chdir(geno_dir) - - for group_file in glob.glob("*"): - if group_file.lower().endswith(('.geno', '.geno.gz')): + for geno_file in glob.glob("*"): + if geno_file.lower().endswith(('.geno', '.geno.gz')): #group_name = genofilename.split('.')[0] - sample_list = get_sample_list(group_file) - print("\n\n{}\n\n".format(sample_list)) + sample_list = get_samplelist(geno_file) -def get_sample_list(group_file): - print(group_file) - genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file)) - if genofilename.lower().endswith('.geno.gz'): +def get_samplelist(geno_file): + genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' genofile = gzip.open(genofilename) else: genofile = open(genofilename) + for line in genofile: line = line.strip() if not line: continue if line.startswith(("#", "@")): continue - headline = line break - headers = headline.split("\t") + + headers = line.split() + if headers[3] == "Mb": samplelist = headers[4:] else: samplelist = headers[3:] return samplelist -if __name__ == '__main__': - get_sample_list_dir() diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index d76a32ce..44f5321e 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -8,11 +8,14 @@ from base.species import TheSpecies def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" self.dataset = data_set.create_dataset(start_vars['dataset']) + print("After creating dataset") self.species = TheSpecies(dataset=self.dataset) + print("After creating species") self.this_trait = GeneralTrait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None) + print("After creating trait") #if read_genotype: - self.dataset.group.read_genotype_file() + #self.dataset.group.read_genotype_file() #self.genotype = self.dataset.group.genotype diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 60e42afb..7397c776 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -41,7 +41,7 @@ class ShowTrait(object): helper_functions.get_species_dataset_trait(self, kw) - self.dataset.group.read_genotype_file() + #self.dataset.group.read_genotype_file() # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 813075b8..bd8f5c86 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -96,7 +96,7 @@ def search_page(): else: return render_template("data_sharing.html", **template_vars.__dict__) else: - key = "search_results:v2:" + json.dumps(request.args, sort_keys=True) + key = "search_results:v3:" + json.dumps(request.args, sort_keys=True) print("key is:", pf(key)) with Bench("Loading cache"): result = Redis.get(key) |