aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py8
-rw-r--r--wqflask/maintenance/gen_select_dataset.py78
-rw-r--r--wqflask/maintenance/generate_kinship_from_bimbam.py6
-rw-r--r--wqflask/maintenance/geno_to_json.py6
-rw-r--r--wqflask/maintenance/get_group_samplelists.py2
5 files changed, 61 insertions, 39 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 45522705..528b98cf 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -140,6 +140,8 @@ class ConvertGenoFile(object):
key, _separater, value = row.partition(':')
key = key.strip()
value = value.strip()
+ if key == "@filler":
+ raise EmptyConfigurations
if key in self.haplotype_notation:
self.configurations[value] = self.haplotype_notation[key]
continue
@@ -154,6 +156,8 @@ class ConvertGenoFile(object):
if not input_file.endswith(('geno', '.geno.gz')):
continue
group_name = ".".join(input_file.split('.')[:-1])
+ if group_name == "HSNIH-Palmer":
+ continue
geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
@@ -176,8 +180,8 @@ class ConvertGenoFile(object):
break
if __name__=="__main__":
- Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
- New_Geno_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+ Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
+ New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 18b2dac9..647e58a2 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -80,7 +80,8 @@ def parse_db_uri():
def get_species():
"""Build species list"""
- Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
+ #Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
+ Cursor.execute("select Name, MenuName from Species order by OrderId")
species = list(Cursor.fetchall())
return species
@@ -120,10 +121,20 @@ def get_types(groups):
else:
types[species][group_name] = [("Genotypes", "Genotypes")]
if group_name in types[species]:
- types[species][group_name] += build_types(species, group_name)
- else:
- types[species][group_name] = build_types(species, group_name)
-
+ types_list = build_types(species, group_name)
+ if len(types_list) > 0:
+ types[species][group_name] += types_list
+ else:
+ if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
+ types[species].pop(group_name, None)
+ groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+ else: #ZS: This whole else statement might be unnecessary, need to check
+ types_list = build_types(species, group_name)
+ if len(types_list) > 0:
+ types[species][group_name] = types_list
+ else:
+ types[species].pop(group_name, None)
+ groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
return types
@@ -187,7 +198,6 @@ def get_datasets(types):
for species, group_dict in types.iteritems():
datasets[species] = {}
for group, type_list in group_dict.iteritems():
- #print("type_list: ", type_list)
datasets[species][group] = {}
for type_name in type_list:
these_datasets = build_datasets(species, group, type_name[0])
@@ -200,26 +210,31 @@ def get_datasets(types):
def build_datasets(species, group, type_name):
"""Gets dataset names from database"""
dataset_text = dataset_value = None
+ datasets = []
if type_name == "Phenotypes":
- print("GROUP:", group)
- Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
+ Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
InbredSet.Name = '%s' and
PublishFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = PublishFreeze.Name and
- PublishFreeze.public > 0 and
- PublishFreeze.confidentiality < 1 order by
- PublishFreeze.CreateTime desc""" % group)
+ InfoFiles.InfoPageName = PublishFreeze.Name order by
+ PublishFreeze.CreateTime asc""" % group)
- results = Cursor.fetchone()
- if results != None:
- dataset_id = str(results[0])
+ results = Cursor.fetchall()
+ if len(results) > 0:
+ for result in results:
+ print(result)
+ dataset_id = str(result[0])
+ dataset_value = str(result[1])
+ if group == 'MDP':
+ dataset_text = "Mouse Phenome Database"
+ else:
+ #dataset_text = "%s Phenotypes" % group
+ dataset_text = str(result[2])
+ datasets.append((dataset_id, dataset_value, dataset_text))
else:
dataset_id = "None"
- dataset_value = "%sPublish" % group
- if group == 'MDP':
- dataset_text = "Mouse Phenome Database"
- else:
- dataset_text = "%s Published Phenotypes" % group
+ dataset_value = "%sPublish" % group
+ dataset_text = "%s Phenotypes" % group
+ datasets.append((dataset_id, dataset_value, dataset_text))
elif type_name == "Genotypes":
Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
@@ -237,10 +252,9 @@ def build_datasets(species, group, type_name):
dataset_id = "None"
dataset_value = "%sGeno" % group
dataset_text = "%s Genotypes" % group
+ datasets.append((dataset_id, dataset_value, dataset_text))
- if dataset_value:
- return [(dataset_id, dataset_value, dataset_text)]
- else:
+ else: # for mRNA expression/ProbeSet
Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
@@ -248,7 +262,7 @@ def build_datasets(species, group, type_name):
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
- ProbeSetFreeze.OrderList asc""" % (species, group, type_name))
+ ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
dataset_results = Cursor.fetchall()
datasets = []
@@ -258,7 +272,7 @@ def build_datasets(species, group, type_name):
this_dataset_info.append(str(info))
datasets.append(this_dataset_info)
- return datasets
+ return datasets
def main():
@@ -271,13 +285,13 @@ def main():
types = get_types(groups)
datasets = get_datasets(types)
- species.append(('All Species', 'All Species'))
- groups['All Species'] = [('All Groups', 'All Groups')]
- types['All Species'] = {}
- types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
- datasets['All Species'] = {}
- datasets['All Species']['All Groups'] = {}
- datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
+ #species.append(('All Species', 'All Species'))
+ #groups['All Species'] = [('All Groups', 'All Groups')]
+ #types['All Species'] = {}
+ #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
+ #datasets['All Species'] = {}
+ #datasets['All Species']['All Groups'] = {}
+ #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
data = dict(species=species,
groups=groups,
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
index f322341d..b53f5dda 100644
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -32,6 +32,8 @@ class GenerateKinshipMatrices(object):
if not input_file.endswith(('geno', '.geno.gz')):
continue
group_name = ".".join(input_file.split('.')[:-1])
+ if group_name == "HSNIH-Palmer":
+ continue
geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt")
pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt")
convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file)
@@ -52,8 +54,8 @@ class GenerateKinshipMatrices(object):
if __name__=="__main__":
- Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
- Bimbam_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+ Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
+ Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
#./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD \ No newline at end of file
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index 789a1691..9579812a 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -24,6 +24,8 @@ import simplejson as json
from pprint import pformat as pf
+#from utility.tools import flat_files
+
class EmptyConfigurations(Exception): pass
@@ -183,8 +185,8 @@ class ConvertGenoFile(object):
if __name__=="__main__":
- Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
- New_Geno_Directory = """/home/zas1024/genotype_files/genotype/json/"""
+ Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
+ New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index 1dc6c46c..fb22898a 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -27,7 +27,7 @@ def get_samplelist_from_geno(genofilename):
continue
break
- headers = line.split()
+ headers = line.split("\t")
if headers[3] == "Mb":
samplelist = headers[4:]