diff options
author | zsloan | 2015-07-09 19:29:30 +0000 |
---|---|---|
committer | zsloan | 2015-07-09 19:29:30 +0000 |
commit | 83ff8ce678f15f2f6003a5800a75166544d7c6fa (patch) | |
tree | 63af8dcc71f4f2709ac2af2029a7471fd46597dc /wqflask | |
parent | 25b1d4c4aad2d44e0ff6bfa5aade1783b7a63120 (diff) | |
download | genenetwork2-83ff8ce678f15f2f6003a5800a75166544d7c6fa.tar.gz |
Mapping methods now check for existing genotype files.
Currently we still usually get our samplelists from the genofile. This is
dumb because it results in us having a bunch of "dummy" genofiles for certain
data sets (seems to be mostly human ones). This means that checking for the
genofile alone isn't enough to determine if a mapping method should exist
for a given group
I wrote some code that will instead get the samplelist from the plink .fam file
for some of these groups/datasets (if the .fam file exists). Ideally I would like to remove all of the dummy
.geno files, but we can't yet do so because it's currently the only place we seem to be storing
the sample list for some groups.
I also moved gemma into the plink directory to get it out of the git tree.
Since it uses the same files as plink, it doesn't make sense for it
to be in its own separate directory
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 17 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfig.py | 2 | ||||
-rwxr-xr-x | wqflask/maintenance/get_group_samplelists.py | 18 | ||||
-rw-r--r-- | wqflask/utility/tools.py | 17 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 28 | ||||
-rwxr-xr-x | wqflask/wqflask/templates/show_trait_mapping_tools.html | 13 |
6 files changed, 82 insertions, 13 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 36f7d036..414cc71a 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -371,7 +371,7 @@ class DatasetGroup(object): self.parlist = [maternal, paternal] def get_samplelist(self): - key = "samplelist:v4:" + self.name + key = "samplelist:v2:" + self.name print("key is:", key) with Bench("Loading cache"): result = Redis.get(key) @@ -384,9 +384,18 @@ class DatasetGroup(object): print(" self.samplelist: ", self.samplelist) else: print("Cache not hit") - try: - self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") - except IOError: + + from utility.tools import plink_command + PLINK_PATH,PLINK_COMMAND = plink_command() + + geno_file_path = webqtlConfig.GENODIR+self.name+".geno" + plink_file_path = PLINK_PATH+"/"+self.name+".fam" + + if os.path.isfile(plink_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + elif os.path.isfile(geno_file_path): + self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + else: self.samplelist = None print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 48d8cd0a..330fec56 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,7 +53,7 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'genotype_files/' -PYLMM_PATH = '/home/zas1024/plink/' +PYLMM_PATH = '/home/zas1024/plink_gemma/' SNP_PATH = '/home/zas1024/snps/' IMGDIR = GNROOT + '/wqflask/wqflask/images/' IMAGESPATH = HTMLPATH + 'images/' diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index c9ec3872..3b3930ad 100755 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -17,8 +17,13 @@ def process_genofiles(geno_dir=webqtlConfig.GENODIR): sample_list = get_samplelist(geno_file) -def get_samplelist(geno_file): - genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) +def get_samplelist(file_type, geno_file): + if file_type == "geno": + return get_samplelist_from_geno(geno_file) + elif file_type == "plink": + return get_samplelist_from_plink(geno_file) + +def get_samplelist_from_geno(genofilename): if os.path.isfile(genofilename + '.gz'): genofilename += '.gz' genofile = gzip.open(genofilename) @@ -41,3 +46,12 @@ def get_samplelist(geno_file): samplelist = headers[3:] return samplelist +def get_samplelist_from_plink(genofilename): + genofile = open(genofilename) + + samplelist = [] + for line in genofile: + line = line.split("\t") + samplelist.append(line[0]) + + return samplelist
\ No newline at end of file diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 6e35f00a..760ded7c 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -66,4 +66,19 @@ def plink_command(default=None): guess = os.environ.get('HOME')+'/plink' path = get_setting('PLINK_PATH',default,guess,get_valid_path) plink_command = path+'/plink' - return path,plink_command
\ No newline at end of file + return path,plink_command + +def gemma_command(default=None): + def get_valid_path(path): + """Test for a valid repository""" + if path: + sys.stderr.write("Trying PLINK_PATH in "+path+"\n") + if path and os.path.isfile(path+'/plink'): + return path + else: + None + + guess = os.environ.get('HOME')+'/plink' + path = get_setting('PLINK_PATH',default,guess,get_valid_path) + gemma_command = path+'/gemma' + return path, gemma_command
\ No newline at end of file diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 61305e9b..02472267 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -16,6 +16,7 @@ from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from utility import webqtlUtil, Plot, Bunch, helper_functions +from utility.tools import pylmm_command, plink_command from base.trait import GeneralTrait from base import data_set from dbFunction import webqtlDatabaseFunction @@ -23,6 +24,9 @@ from basicStatistics import BasicStatisticsFunctions from pprint import pformat as pf +PYLMM_PATH,PYLMM_COMMAND = pylmm_command() +PLINK_PATH,PLINK_COMMAND = plink_command() + ############################################### # # Todo: Put in security to ensure that user has permission to access confidential data sets @@ -137,12 +141,36 @@ class ShowTrait(object): sample_lists = [group.sample_list for group in self.sample_groups] print("sample_lists is:", pf(sample_lists)) + self.get_mapping_methods() + js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes, temp_uuid = self.temp_uuid) self.js_data = js_data + def get_mapping_methods(self): + '''Only display mapping methods when the dataset group's genotype file exists''' + def check_plink_gemma(): + if (os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".bed") and + os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".bim") and + os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".fam") and + os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".map")): + + return True + else: + return False + + def check_pylmm_rqtl(): + if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno"): + return True + else: + return False + + self.use_plink_gemma = check_plink_gemma() + self.use_pylmm_rqtl = check_pylmm_rqtl() + + def read_data(self, include_f1=False): '''read user input data or from trait data and analysis form''' diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index 1550647a..547da0d0 100755 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -1,20 +1,21 @@ <div> + {% if use_pylmm_rqtl or use_plink_gemma %} <div class="col-xs-6"> <div class="tabbable"> <!-- Only required for left/right tabs --> <ul class="nav nav-pills"> + {% if use_pylmm_rqtl and not use_plink_gemma %} <li class="active"> <a href="#pylmm" data-toggle="tab">pyLMM</a> </li> <li> <a href="#rqtl_geno" data-toggle="tab">rqtl</a> </li> - {% if dataset.group.species != 'human' %} <li> <a href="#interval_mapping" data-toggle="tab">Interval Mapping</a> </li> {% endif %} - {% if dataset.group.species == 'human' %} + {% if use_plink_gemma %} <li> <a href="#plink" data-toggle="tab">PLINK</a> </li> @@ -28,8 +29,8 @@ </ul> <div class="tab-content"> + {% if use_pylmm_rqtl %} <div class="tab-pane active" id="pylmm"> - <div style="padding: 20px" class="form-horizontal"> <div class="mapping_method_fields form-group"> <label for="mapping_permutations" class="col-xs-2 control-label">Permutations</label> @@ -160,7 +161,6 @@ </div> </div> </div> - {% if dataset.group.species != 'human' %} <div class="tab-pane" id="interval_mapping"> <div style="padding: 20px" class="form-horizontal"> <div class="mapping_method_fields form-group"> @@ -214,7 +214,7 @@ </div> </div> {% endif %} - {% if dataset.group.species == 'human' %} + {% if use_plink_gemma %} <div class="tab-pane" id="plink"> <div style="padding: 20px" class="form-horizontal"> <div class="mapping_method_fields form-group"> @@ -272,4 +272,7 @@ <div id="mapping_result_holder_wrapper" style="display:none;"> <div id="mapping_result_holder"></div> </div> + {% else %} + Mapping options are disabled for data not matched with genotypes. + {% endif %} </div> |