Merge master and testing master

author: Pjotr Prins 2020-04-26 12:32:46 -0500
committer: Pjotr Prins 2020-04-26 12:32:46 -0500
commit: 14d49450c47e8ef3b34bca9e759acd371319a854 (patch)
tree: 3f45b7455ec114ebdd9fdd2f2c4a92c9eb3cf756 /wqflask/maintenance
parent: 33e2e09d00881de9b07274bc52b58587e5135cab (diff)
parent: 15d8e24da26cf6d42d1bdb8a9a189b9ec061d147 (diff)
download: genenetwork2-master.tar.gz
5 files changed, 61 insertions, 39 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 45522705..528b98cf 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -140,6 +140,8 @@ class ConvertGenoFile(object):
                 key, _separater, value = row.partition(':')
                 key = key.strip()
                 value = value.strip()
+                if key == "@filler":
+                    raise EmptyConfigurations
                 if key in self.haplotype_notation:
                     self.configurations[value] = self.haplotype_notation[key]
                 continue
@@ -154,6 +156,8 @@ class ConvertGenoFile(object):
             if not input_file.endswith(('geno', '.geno.gz')):
                 continue
             group_name = ".".join(input_file.split('.')[:-1])
+            if group_name == "HSNIH-Palmer":
+                continue
             geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
             pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
             snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
@@ -176,8 +180,8 @@ class ConvertGenoFile(object):
                 break
 
 if __name__=="__main__":
-    Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
-    New_Geno_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+    Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
+    New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 18b2dac9..647e58a2 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -80,7 +80,8 @@ def parse_db_uri():
 
 def get_species():
     """Build species list"""
-    Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
+    #Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId")
+    Cursor.execute("select Name, MenuName from Species order by OrderId")
     species = list(Cursor.fetchall())
     return species
 
@@ -120,10 +121,20 @@ def get_types(groups):
                 else:
                     types[species][group_name] = [("Genotypes", "Genotypes")]
             if group_name in types[species]:
-                types[species][group_name] += build_types(species, group_name)
-            else:
-                types[species][group_name] = build_types(species, group_name)
-
+                types_list = build_types(species, group_name)
+                if len(types_list) > 0:
+                    types[species][group_name] += types_list
+                else:
+                    if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
+                        types[species].pop(group_name, None)
+                        groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+            else: #ZS: This whole else statement might be unnecessary, need to check
+                types_list = build_types(species, group_name)
+                if len(types_list) > 0:
+                    types[species][group_name] = types_list
+                else:
+                    types[species].pop(group_name, None)
+                    groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
     return types
 
 
@@ -187,7 +198,6 @@ def get_datasets(types):
     for species, group_dict in types.iteritems():
         datasets[species] = {}
         for group, type_list in group_dict.iteritems():
-            #print("type_list: ", type_list)
             datasets[species][group] = {}
             for type_name in type_list:
                 these_datasets = build_datasets(species, group, type_name[0])
@@ -200,26 +210,31 @@ def get_datasets(types):
 def build_datasets(species, group, type_name):
     """Gets dataset names from database"""
     dataset_text = dataset_value = None
+    datasets = []
     if type_name == "Phenotypes":
-        print("GROUP:", group)
-        Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
+        Cursor.execute("""select InfoFiles.GN_AccesionId, PublishFreeze.Name, PublishFreeze.FullName from InfoFiles, PublishFreeze, InbredSet where
                     InbredSet.Name = '%s' and
                     PublishFreeze.InbredSetId = InbredSet.Id and
-                    InfoFiles.InfoPageName = PublishFreeze.Name and
-                    PublishFreeze.public > 0 and
-                    PublishFreeze.confidentiality < 1 order by
-                    PublishFreeze.CreateTime desc""" % group)
+                    InfoFiles.InfoPageName = PublishFreeze.Name order by
+                    PublishFreeze.CreateTime asc""" % group)
 
-        results = Cursor.fetchone()
-        if results != None:
-            dataset_id = str(results[0])
+        results = Cursor.fetchall()
+        if len(results) > 0:
+            for result in results:
+                print(result)
+                dataset_id = str(result[0])
+                dataset_value = str(result[1])
+                if group == 'MDP':
+                    dataset_text = "Mouse Phenome Database"
+                else:
+                    #dataset_text = "%s Phenotypes" % group
+                    dataset_text = str(result[2])
+                datasets.append((dataset_id, dataset_value, dataset_text))
         else:
             dataset_id = "None"
-        dataset_value = "%sPublish" % group
-        if group == 'MDP':
-            dataset_text = "Mouse Phenome Database"
-        else:
-            dataset_text = "%s Published Phenotypes" % group
+            dataset_value = "%sPublish" % group
+            dataset_text = "%s Phenotypes" % group
+            datasets.append((dataset_id, dataset_value, dataset_text))
 
     elif type_name == "Genotypes":
         Cursor.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
@@ -237,10 +252,9 @@ def build_datasets(species, group, type_name):
             dataset_id = "None"
         dataset_value = "%sGeno" % group
         dataset_text = "%s Genotypes" % group
+        datasets.append((dataset_id, dataset_value, dataset_text))
 
-    if dataset_value:
-        return [(dataset_id, dataset_value, dataset_text)]
-    else:
+    else: # for mRNA expression/ProbeSet
         Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
                     ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
                     Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
@@ -248,7 +262,7 @@ def build_datasets(species, group, type_name):
                     ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '%s' and
                     ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and
                     ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 order by
-                    ProbeSetFreeze.OrderList asc""" % (species, group, type_name))
+                    ProbeSetFreeze.CreateTime desc""" % (species, group, type_name))
 
         dataset_results = Cursor.fetchall()
         datasets = []
@@ -258,7 +272,7 @@ def build_datasets(species, group, type_name):
                 this_dataset_info.append(str(info))
             datasets.append(this_dataset_info)
 
-        return datasets
+    return datasets
 
 
 def main():
@@ -271,13 +285,13 @@ def main():
     types = get_types(groups)
     datasets = get_datasets(types)
 
-    species.append(('All Species', 'All Species'))
-    groups['All Species'] = [('All Groups', 'All Groups')]
-    types['All Species'] = {}
-    types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
-    datasets['All Species'] = {}
-    datasets['All Species']['All Groups'] = {}
-    datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
+    #species.append(('All Species', 'All Species'))
+    #groups['All Species'] = [('All Groups', 'All Groups')]
+    #types['All Species'] = {}
+    #types['All Species']['All Groups'] = [('Phenotypes', 'Phenotypes')]
+    #datasets['All Species'] = {}
+    #datasets['All Species']['All Groups'] = {}
+    #datasets['All Species']['All Groups']['Phenotypes'] = [('All Phenotypes','All Phenotypes')]
 
     data = dict(species=species,
                 groups=groups,
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
index f322341d..b53f5dda 100644
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -32,6 +32,8 @@ class GenerateKinshipMatrices(object):
             if not input_file.endswith(('geno', '.geno.gz')):
                 continue
             group_name = ".".join(input_file.split('.')[:-1])
+            if group_name == "HSNIH-Palmer":
+                continue
             geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt")
             pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt")
             convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file)
@@ -52,8 +54,8 @@ class GenerateKinshipMatrices(object):
     
     
 if __name__=="__main__":
-    Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
-    Bimbam_Directory = """/home/zas1024/genotype_files/genotype/bimbam/"""
+    Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
+    Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
     GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
     
     #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
\ No newline at end of file
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index 789a1691..9579812a 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -24,6 +24,8 @@ import simplejson as json
 
 from pprint import pformat as pf
 
+#from utility.tools import flat_files
+
 class EmptyConfigurations(Exception): pass
 
         
@@ -183,8 +185,8 @@ class ConvertGenoFile(object):
 
 
 if __name__=="__main__":
-    Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/"""
-    New_Geno_Directory = """/home/zas1024/genotype_files/genotype/json/"""
+    Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
+    New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index 1dc6c46c..fb22898a 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -27,7 +27,7 @@ def get_samplelist_from_geno(genofilename):
             continue
         break
 
-    headers = line.split()
+    headers = line.split("\t")
 
     if headers[3] == "Mb":
         samplelist = headers[4:]
author	Pjotr Prins	2020-04-26 12:32:46 -0500
committer	Pjotr Prins	2020-04-26 12:32:46 -0500
commit	14d49450c47e8ef3b34bca9e759acd371319a854 (patch)
tree	3f45b7455ec114ebdd9fdd2f2c4a92c9eb3cf756 /wqflask/maintenance
parent	33e2e09d00881de9b07274bc52b58587e5135cab (diff)
parent	15d8e24da26cf6d42d1bdb8a9a189b9ec061d147 (diff)
download	genenetwork2-master.tar.gz