From 83ff8ce678f15f2f6003a5800a75166544d7c6fa Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 9 Jul 2015 19:29:30 +0000 Subject: Mapping methods now check for existing genotype files. Currently we still usually get our samplelists from the genofile. This is dumb because it results in us having a bunch of "dummy" genofiles for certain data sets (seems to be mostly human ones). This means that checking for the genofile alone isn't enough to determine if a mapping method should exist for a given group I wrote some code that will instead get the samplelist from the plink .fam file for some of these groups/datasets (if the .fam file exists). Ideally I would like to remove all of the dummy .geno files, but we can't yet do so because it's currently the only place we seem to be storing the sample list for some groups. I also moved gemma into the plink directory to get it out of the git tree. Since it uses the same files as plink, it doesn't make sense for it to be in its own separate directory --- wqflask/maintenance/get_group_samplelists.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'wqflask/maintenance/get_group_samplelists.py') diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index c9ec3872..3b3930ad 100755 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -17,8 +17,13 @@ def process_genofiles(geno_dir=webqtlConfig.GENODIR): sample_list = get_samplelist(geno_file) -def get_samplelist(geno_file): - genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) +def get_samplelist(file_type, geno_file): + if file_type == "geno": + return get_samplelist_from_geno(geno_file) + elif file_type == "plink": + return get_samplelist_from_plink(geno_file) + +def get_samplelist_from_geno(genofilename): if os.path.isfile(genofilename + '.gz'): genofilename += '.gz' genofile = gzip.open(genofilename) @@ -41,3 +46,12 @@ def get_samplelist(geno_file): samplelist = headers[3:] return samplelist +def get_samplelist_from_plink(genofilename): + genofile = open(genofilename) + + samplelist = [] + for line in genofile: + line = line.split("\t") + samplelist.append(line[0]) + + return samplelist \ No newline at end of file -- cgit v1.2.3 From ce77f735b36c2909ac79c86b673f057eded23f1a Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 9 Jul 2015 19:37:35 +0000 Subject: Forgot to add one change to the last commit --- wqflask/maintenance/get_group_samplelists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/maintenance/get_group_samplelists.py') diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 3b3930ad..b8397b47 100755 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -51,7 +51,7 @@ def get_samplelist_from_plink(genofilename): samplelist = [] for line in genofile: - line = line.split("\t") + line = line.split(" ") samplelist.append(line[0]) return samplelist \ No newline at end of file -- cgit v1.2.3