diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 8 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 78 | ||||
-rw-r--r-- | wqflask/maintenance/generate_kinship_from_bimbam.py | 6 | ||||
-rw-r--r-- | wqflask/maintenance/geno_to_json.py | 6 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 2 |
5 files changed, 61 insertions, 39 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 45522705..528b98cf 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -140,6 +140,8 @@ class ConvertGenoFile(object): key, _separater, value = row.partition(':') key = key.strip() value = value.strip() + if key == "@filler": + raise EmptyConfigurations if key in self.haplotype_notation: self.configurations[value] = self.haplotype_notation[key] continue @@ -154,6 +156,8 @@ class ConvertGenoFile(object): if not input_file.endswith(('geno', '.geno.gz')): continue group_name = ".".join(input_file.split('.')[:-1]) + if group_name == "HSNIH-Palmer": + continue geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") @@ -176,8 +180,8 @@ class ConvertGenoFile(object): break if __name__=="__main__": - Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/""" - New_Geno_Directory = """/home/zas1024/genotype_files/genotype/bimbam/""" + Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" + New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 18b2dac9..647e58a2 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -80,7 +80,8 @@ def parse_db_uri(): def get_species(): """Build species list""" - Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + #Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + Cursor.execute("select Name, MenuName from Species order by OrderId") species = list(Cursor.fetchall()) return species @@ -120,10 +121,20 @@ def get_types(groups): else: types[species][group_name] = [("Genotypes", "Genotypes")] if group_name in types[species]: - types[species][group_name] += build_types(species, group_name) - else: - types[species][group_name] = build_types(species, group_name) - + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] += types_list + else: + if not phenotypes_exist(group_name) and not genotypes_exist(group_name): + types[species].pop(group_name, None) + groups[species] = tuple(group for group in groups[species] if group[0] != group_name) + else: #ZS: This whole else statement might be unnecessary, need to check + types_list = build_types(species, group_name) + if len(types_list) > 0: + types[species][group_name] = types_list + else: + types[species].pop(group_name, None) + groups[species] = tuple(group for group in groups[species] if group[0] != group_name) return types @@ -187,7 +198,6 @@ def get_datasets(types): for species, group_dict in types.iteritems(): datasets[species] = {} for group, type_list in group_dict.iteritems(): - #print("type_list: ", type_list) datasets[species][group] = {} for type_name in type_list: these_datasets = build_datasets(species, group, type_name[0]) @@ -200,26 +210,31 @@ def get_datasets(types): def build_datasets(species, group, type_name): """Gets dataset names from database""" dataset_text = dataset_value = None + datasets = [] if type_name == "Phenotypes": - print("GROUP:", group) - Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where + Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where InbredSet.Name = '%s' and PublishFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = PublishFreeze.Name and - PublishFreeze.public > 0 and - PublishFreeze.confidentiality < 1 order by - PublishFreeze.CreateTime desc""" % group) + InfoFiles.InfoPageName = PublishFreeze.Name order by + PublishFreeze.CreateTime asc""" % group) - results = Cursor.fetchone() - if results != None: - dataset_id = str(results[0]) + results = Cursor.fetchall() + if len(results) > 0: + for result in results: + print(result) + dataset_id = str(result[0]) + dataset_value = str(result[1]) + if group == 'MDP': + dataset_text = "Mouse Phenome Database" + else: + #dataset_text = "%s Phenotypes" % group + dataset_text = str(result[2]) + datasets.append((dataset_id, dataset_value, dataset_text)) else: dataset_id = "None" - dataset_value = "%sPublish" % group - if group == 'MDP': - dataset_text = "Mouse Phenome Database" - else: - dataset_text = "%s Published Phenotypes" % group + dataset_value = "%sPublish" % group + dataset_text = "%s Phenotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) elif type_name == "Genotypes": Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where @@ -237,10 +252,9 @@ def build_datasets(species, group, type_name): dataset_id = "None" dataset_value = "%sGeno" % group dataset_text = "%s Genotypes" % group + datasets.append((dataset_id, dataset_value, dataset_text)) - if dataset_value: - return [(dataset_id, dataset_value, dataset_text)] - else: + else: # for mRNA expression/ProbeSet Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and @@ -248,7 +262,7 @@ def build_datasets(species, group, type_name): ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by - ProbeSetFreeze.OrderList asc""" % (species, group, type_name)) + ProbeSetFreeze.CreateTime desc""" % (species, group, type_name)) dataset_results = Cursor.fetchall() datasets = [] @@ -258,7 +272,7 @@ def build_datasets(species, group, type_name): this_dataset_info.append(str(info)) datasets.append(this_dataset_info) - return datasets + return datasets def main(): @@ -271,13 +285,13 @@ def main(): types = get_types(groups) datasets = get_datasets(types) - species.append(('All Species', 'All Species')) - groups['All Species'] = [('All Groups', 'All Groups')] - types['All Species'] = {} - types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] - datasets['All Species'] = {} - datasets['All Species']['All Groups'] = {} - datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] + #species.append(('All Species', 'All Species')) + #groups['All Species'] = [('All Groups', 'All Groups')] + #types['All Species'] = {} + #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')] + #datasets['All Species'] = {} + #datasets['All Species']['All Groups'] = {} + #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')] data = dict(species=species, groups=groups, diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index f322341d..b53f5dda 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -32,6 +32,8 @@ class GenerateKinshipMatrices(object): if not input_file.endswith(('geno', '.geno.gz')): continue group_name = ".".join(input_file.split('.')[:-1]) + if group_name == "HSNIH-Palmer": + continue geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt") pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt") convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file) @@ -52,8 +54,8 @@ class GenerateKinshipMatrices(object): if __name__=="__main__": - Geno_Directory = """/home/zas1024/genotype_files/genotype/""" - Bimbam_Directory = """/home/zas1024/genotype_files/genotype/bimbam/""" + Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" + Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
\ No newline at end of file diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index 789a1691..9579812a 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -24,6 +24,8 @@ import simplejson as json from pprint import pformat as pf +#from utility.tools import flat_files + class EmptyConfigurations(Exception): pass @@ -183,8 +185,8 @@ class ConvertGenoFile(object): if __name__=="__main__": - Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/""" - New_Geno_Directory = """/home/zas1024/genotype_files/genotype/json/""" + Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" + New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 1dc6c46c..fb22898a 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -27,7 +27,7 @@ def get_samplelist_from_geno(genofilename): continue break - headers = line.split() + headers = line.split("\t") if headers[3] == "Mb": samplelist = headers[4:] |