diff options
author | zsloan | 2015-07-09 19:29:30 +0000 |
---|---|---|
committer | zsloan | 2015-07-09 19:29:30 +0000 |
commit | 83ff8ce678f15f2f6003a5800a75166544d7c6fa (patch) | |
tree | 63af8dcc71f4f2709ac2af2029a7471fd46597dc /wqflask/base/data_set.py | |
parent | 25b1d4c4aad2d44e0ff6bfa5aade1783b7a63120 (diff) | |
download | genenetwork2-83ff8ce678f15f2f6003a5800a75166544d7c6fa.tar.gz |
Mapping methods now check for existing genotype files.
Currently we still usually get our samplelists from the genofile. This is
dumb because it results in us having a bunch of "dummy" genofiles for certain
data sets (seems to be mostly human ones). This means that checking for the
genofile alone isn't enough to determine if a mapping method should exist
for a given group
I wrote some code that will instead get the samplelist from the plink .fam file
for some of these groups/datasets (if the .fam file exists). Ideally I would like to remove all of the dummy
.geno files, but we can't yet do so because it's currently the only place we seem to be storing
the sample list for some groups.
I also moved gemma into the plink directory to get it out of the git tree.
Since it uses the same files as plink, it doesn't make sense for it
to be in its own separate directory
Diffstat (limited to 'wqflask/base/data_set.py')
-rwxr-xr-x | wqflask/base/data_set.py | 17 |
1 files changed, 13 insertions, 4 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 36f7d036..414cc71a 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -371,7 +371,7 @@ class DatasetGroup(object): self.parlist = [maternal, paternal] def get_samplelist(self): - key = "samplelist:v4:" + self.name + key = "samplelist:v2:" + self.name print("key is:", key) with Bench("Loading cache"): result = Redis.get(key) @@ -384,9 +384,18 @@ class DatasetGroup(object): print(" self.samplelist: ", self.samplelist) else: print("Cache not hit") - try: - self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno") - except IOError: + + from utility.tools import plink_command + PLINK_PATH,PLINK_COMMAND = plink_command() + + geno_file_path = webqtlConfig.GENODIR+self.name+".geno" + plink_file_path = PLINK_PATH+"/"+self.name+".fam" + + if os.path.isfile(plink_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + elif os.path.isfile(geno_file_path): + self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + else: self.samplelist = None print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) |