From 82f493650909e2351035e26e9dc82b16498beb48 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 19 Jul 2013 17:34:52 -0500 Subject: Finished integrating code that reads sample list from geno files withouot using reaper and caches results so it doesn't need to read the file every single time someone loads a page --- wqflask/maintenance/__init__.py | 0 wqflask/maintenance/get_group_samplelists.py | 26 ++++++++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) create mode 100644 wqflask/maintenance/__init__.py (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 2434038e..99e22904 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -7,37 +7,35 @@ import gzip from base import webqtlConfig -def get_sample_list_dir(geno_dir="/home/zas1024/gene/web/genotypes/"): +def process_genofiles(geno_dir=webqtlConfig.GENODIR): os.chdir(geno_dir) - - for group_file in glob.glob("*"): - if group_file.lower().endswith(('.geno', '.geno.gz')): + for geno_file in glob.glob("*"): + if geno_file.lower().endswith(('.geno', '.geno.gz')): #group_name = genofilename.split('.')[0] - sample_list = get_sample_list(group_file) - print("\n\n{}\n\n".format(sample_list)) + sample_list = get_samplelist(geno_file) -def get_sample_list(group_file): - print(group_file) - genofilename = str(os.path.join(webqtlConfig.GENODIR, group_file)) - if genofilename.lower().endswith('.geno.gz'): +def get_samplelist(geno_file): + genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' genofile = gzip.open(genofilename) else: genofile = open(genofilename) + for line in genofile: line = line.strip() if not line: continue if line.startswith(("#", "@")): continue - headline = line break - headers = headline.split("\t") + + headers = line.split() + if headers[3] == "Mb": samplelist = headers[4:] else: samplelist = headers[3:] return samplelist -if __name__ == '__main__': - get_sample_list_dir() -- cgit v1.2.3