Merge github.com:genenetwork/genenetwork2 into acenteno

author: Arthur Centeno 2021-10-25 21:04:23 +0000
committer: Arthur Centeno 2021-10-25 21:04:23 +0000
commit: 499a80f138030c4de1629c043c8f9401a99894ea (patch)
tree: 449dcae965d13f966fb6d52625fbc86661c8c6a0 /wqflask/maintenance
parent: 6151faa9ea67af4bf4ea95fb681a9dc4319474b6 (diff)
parent: 700802303e5e8221a9d591ba985d6607aa61e1ce (diff)
download: genenetwork2-499a80f138030c4de1629c043c8f9401a99894ea.tar.gz
10 files changed, 343 insertions, 145 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
index e833b395..18fbb8a1 100644
--- a/wqflask/maintenance/convert_dryad_to_bimbam.py
+++ b/wqflask/maintenance/convert_dryad_to_bimbam.py
@@ -6,7 +6,6 @@ Convert data dryad files to a BIMBAM _geno and _snps file
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 
@@ -42,7 +41,7 @@ def read_dryad_file(filename):
 
     return geno_rows
 
-    #for i, marker in enumerate(marker_list):
+    # for i, marker in enumerate(marker_list):
     #    this_row = []
     #    this_row.append(marker)
     #    this_row.append("X")
@@ -53,18 +52,21 @@ def read_dryad_file(filename):
     #                this_row.append(line.split(" ")[i+2])
     #        print("row: " + str(i))
     #        geno_rows.append(this_row)
-    #            
-    #return geno_rows
+    #
+    # return geno_rows
+
 
 def write_bimbam_files(geno_rows):
     with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
         for row in geno_rows:
             geno_fh.write(", ".join(row) + "\n")
 
+
 def convert_dryad_to_bimbam(filename):
     geno_file_rows = read_dryad_file(filename)
     write_bimbam_files(geno_file_rows)
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
-    convert_dryad_to_bimbam(input_filename)
\ No newline at end of file
+    convert_dryad_to_bimbam(input_filename)
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 528b98cf..078be529 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -9,7 +9,6 @@ code
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
@@ -21,9 +20,12 @@ import simplejson as json
 
 from pprint import pformat as pf
 
-class EmptyConfigurations(Exception): pass
 
-class Marker(object):
+class EmptyConfigurations(Exception):
+    pass
+
+
+class Marker:
     def __init__(self):
         self.name = None
         self.chr = None
@@ -31,7 +33,8 @@ class Marker(object):
         self.Mb = None
         self.genotypes = []
 
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
 
     def __init__(self, input_file, output_files):
         self.input_file = input_file
@@ -53,7 +56,7 @@ class ConvertGenoFile(object):
             '@pat': "0",
             '@het': "0.5",
             '@unk': "NA"
-            }
+        }
 
         self.configurations = {}
         self.input_fh = open(self.input_file)
@@ -81,13 +84,14 @@ class ConvertGenoFile(object):
                 genotypes = row_items[2:]
             for item_count, genotype in enumerate(genotypes):
                 if genotype.upper().strip() in self.configurations:
-                    this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+                    this_marker.genotypes.append(
+                        self.configurations[genotype.upper().strip()])
                 else:
                     this_marker.genotypes.append("NA")
 
             self.markers.append(this_marker.__dict__)
 
-        self.write_to_bimbam()    
+        self.write_to_bimbam()
 
     def write_to_bimbam(self):
         with open(self.output_files[0], "w") as geno_fh:
@@ -104,9 +108,11 @@ class ConvertGenoFile(object):
         with open(self.output_files[2], "w") as snp_fh:
             for marker in self.markers:
                 if self.mb_exists:
-                    snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n")
+                    snp_fh.write(
+                        marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
                 else:
-                    snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n")
+                    snp_fh.write(
+                        marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
 
     def get_sample_list(self, row_contents):
         self.sample_list = []
@@ -120,7 +126,7 @@ class ConvertGenoFile(object):
                 self.sample_list = row_contents[3:]
             else:
                 self.sample_list = row_contents[2:]
-    
+
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
             self.latest_row_value = row
@@ -158,10 +164,14 @@ class ConvertGenoFile(object):
             group_name = ".".join(input_file.split('.')[:-1])
             if group_name == "HSNIH-Palmer":
                 continue
-            geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
-            pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
-            snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
-            output_files = [geno_output_file, pheno_output_file, snp_output_file]
+            geno_output_file = os.path.join(
+                new_directory, group_name + "_geno.txt")
+            pheno_output_file = os.path.join(
+                new_directory, group_name + "_pheno.txt")
+            snp_output_file = os.path.join(
+                new_directory, group_name + "_snps.txt")
+            output_files = [geno_output_file,
+                            pheno_output_file, snp_output_file]
             print("%s -> %s" % (
                 os.path.join(old_directory, input_file), geno_output_file))
             convertob = ConvertGenoFile(input_file, output_files)
@@ -174,17 +184,18 @@ class ConvertGenoFile(object):
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                convertob.latest_col_pos))
+                                                                    convertob.latest_col_pos))
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
     New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    #convertob.convert()
+    # convertob.convert()
     ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    #ConvertGenoFiles(Geno_Directory)
\ No newline at end of file
+    # ConvertGenoFiles(Geno_Directory)
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 647e58a2..db65a11f 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -30,18 +30,10 @@ It needs to be run manually when database has been changed. Run it as
 #
 # This module is used by GeneNetwork project (www.genenetwork.org)
 
-from __future__ import print_function, division
-
-#from flask import config
-#
-#cdict = {}
-#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS')
-#print("cdict is:", cdict)
-
 import sys
 
 # NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0,'./')
+sys.path.insert(0, './')
 # NEW: import app to avoid a circular dependency on utility.tools
 from wqflask import app
 
@@ -50,7 +42,7 @@ from utility.tools import locate, locate_ignore_error, TEMPDIR, SQL_URI
 import MySQLdb
 
 import simplejson as json
-import urlparse
+import urllib.parse
 
 
 #import sqlalchemy as sa
@@ -63,16 +55,17 @@ from pprint import pformat as pf
 
 #conn = Engine.connect()
 
+
 def parse_db_uri():
     """Converts a database URI to the db name, host name, user name, and password"""
 
-    parsed_uri = urlparse.urlparse(SQL_URI)
+    parsed_uri = urllib.parse.urlparse(SQL_URI)
 
     db_conn_info = dict(
-                        db = parsed_uri.path[1:],
-                        host = parsed_uri.hostname,
-                        user = parsed_uri.username,
-                        passwd = parsed_uri.password)
+        db=parsed_uri.path[1:],
+        host=parsed_uri.hostname,
+        user=parsed_uri.username,
+        passwd=parsed_uri.password)
 
     print(db_conn_info)
     return db_conn_info
@@ -108,7 +101,7 @@ def get_types(groups):
     """Build types list"""
     types = {}
     #print("Groups: ", pf(groups))
-    for species, group_dict in groups.iteritems():
+    for species, group_dict in list(groups.items()):
         types[species] = {}
         for group_name, _group_full_name in group_dict:
             # make group an alias to shorten the code
@@ -127,21 +120,23 @@ def get_types(groups):
                 else:
                     if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
                         types[species].pop(group_name, None)
-                        groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
-            else: #ZS: This whole else statement might be unnecessary, need to check
+                        groups[species] = tuple(
+                            group for group in groups[species] if group[0] != group_name)
+            else:  # ZS: This whole else statement might be unnecessary, need to check
                 types_list = build_types(species, group_name)
                 if len(types_list) > 0:
                     types[species][group_name] = types_list
                 else:
                     types[species].pop(group_name, None)
-                    groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+                    groups[species] = tuple(
+                        group for group in groups[species] if group[0] != group_name)
     return types
 
 
 def phenotypes_exist(group_name):
     #print("group_name:", group_name)
     Cursor.execute("""select Name from PublishFreeze
-                      where PublishFreeze.Name = '%s'""" % (group_name+"Publish"))
+                      where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
 
     results = Cursor.fetchone()
     #print("RESULTS:", results)
@@ -151,10 +146,11 @@ def phenotypes_exist(group_name):
     else:
         return False
 
+
 def genotypes_exist(group_name):
     #print("group_name:", group_name)
     Cursor.execute("""select Name from GenoFreeze
-                      where GenoFreeze.Name = '%s'""" % (group_name+"Geno"))
+                      where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
 
     results = Cursor.fetchone()
     #print("RESULTS:", results)
@@ -164,6 +160,7 @@ def genotypes_exist(group_name):
     else:
         return False
 
+
 def build_types(species, group):
     """Fetches tissues
 
@@ -192,12 +189,13 @@ def build_types(species, group):
 
     return results
 
+
 def get_datasets(types):
     """Build datasets list"""
     datasets = {}
-    for species, group_dict in types.iteritems():
+    for species, group_dict in list(types.items()):
         datasets[species] = {}
-        for group, type_list in group_dict.iteritems():
+        for group, type_list in list(group_dict.items()):
             datasets[species][group] = {}
             for type_name in type_list:
                 these_datasets = build_datasets(species, group, type_name[0])
@@ -254,7 +252,7 @@ def build_datasets(species, group, type_name):
         dataset_text = "%s Genotypes" % group
         datasets.append((dataset_id, dataset_value, dataset_text))
 
-    else: # for mRNA expression/ProbeSet
+    else:  # for mRNA expression/ProbeSet
         Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
                     ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
                     Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
@@ -316,7 +314,8 @@ def _test_it():
     datasets = build_datasets("Mouse", "BXD", "Hippocampus")
     #print("build_datasets:", pf(datasets))
 
+
 if __name__ == '__main__':
     Conn = MySQLdb.Connect(**parse_db_uri())
     Cursor = Conn.cursor()
-    main()
\ No newline at end of file
+    main()
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
index b53f5dda..9f01d094 100644
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -8,20 +8,22 @@ and uses GEMMA to generate their corresponding kinship/relatedness matrix file
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
 import glob
 
-class GenerateKinshipMatrices(object):
+
+class GenerateKinshipMatrices:
     def __init__(self, group_name, geno_file, pheno_file):
         self.group_name = group_name
         self.geno_file = geno_file
         self.pheno_file = pheno_file
-    
+
     def generate_kinship(self):
-        gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
+        gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \
+            " -p " + self.pheno_file + \
+            " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
         print("command:", gemma_command)
         os.system(gemma_command)
 
@@ -34,9 +36,12 @@ class GenerateKinshipMatrices(object):
             group_name = ".".join(input_file.split('.')[:-1])
             if group_name == "HSNIH-Palmer":
                 continue
-            geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt")
-            pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt")
-            convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file)
+            geno_input_file = os.path.join(
+                bimbam_dir, group_name + "_geno.txt")
+            pheno_input_file = os.path.join(
+                bimbam_dir, group_name + "_pheno.txt")
+            convertob = GenerateKinshipMatrices(
+                group_name, geno_input_file, pheno_input_file)
             try:
                 convertob.generate_kinship()
             except EmptyConfigurations as why:
@@ -47,15 +52,15 @@ class GenerateKinshipMatrices(object):
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                convertob.latest_col_pos))
+                                                                    convertob.latest_col_pos))
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
-    
-    
-if __name__=="__main__":
+
+
+if __name__ == "__main__":
     Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
     Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
     GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
-    
-    #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
\ No newline at end of file
+
+    # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py
index b7b2dc8e..e964c8ed 100644
--- a/wqflask/maintenance/generate_probesetfreeze_file.py
+++ b/wqflask/maintenance/generate_probesetfreeze_file.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
 
-from __future__ import absolute_import, print_function, division
-
 import sys
 
 # sys.path.insert(0, "..") - why?
@@ -25,10 +23,12 @@ def get_cursor():
     cursor = con.cursor()
     return cursor
 
+
 def show_progress(process, counter):
     if counter % 1000 == 0:
         print("{}: {}".format(process, counter))
 
+
 def get_strains(cursor):
     cursor.execute("""select Strain.Name
                       from Strain, StrainXRef, InbredSet
@@ -44,6 +44,7 @@ def get_strains(cursor):
 
     return strains
 
+
 def get_probeset_vals(cursor, dataset_name):
     cursor.execute(""" select ProbeSet.Id, ProbeSet.Name
                 from ProbeSetXRef,
@@ -79,10 +80,11 @@ def get_probeset_vals(cursor, dataset_name):
 
     return probeset_vals
 
+
 def trim_strains(strains, probeset_vals):
     trimmed_strains = []
     #print("probeset_vals is:", pf(probeset_vals))
-    first_probeset = list(probeset_vals.itervalues())[0]
+    first_probeset = list(probeset_vals.values())[0]
     print("\n**** first_probeset is:", pf(first_probeset))
     for strain in strains:
         print("\n**** strain is:", pf(strain))
@@ -91,6 +93,7 @@ def trim_strains(strains, probeset_vals):
     print("trimmed_strains:", pf(trimmed_strains))
     return trimmed_strains
 
+
 def write_data_matrix_file(strains, probeset_vals, filename):
     with open(filename, "wb") as fh:
         csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL)
@@ -105,10 +108,12 @@ def write_data_matrix_file(strains, probeset_vals, filename):
             csv_writer.writerow(row_data)
             show_progress("Writing", counter)
 
+
 def main():
-    filename = os.path.expanduser("~/gene/wqflask/maintenance/" +
-                "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" +
-                "(Oct08)_RankInv_Beta.txt")
+    filename = os.path.expanduser(
+        "~/gene/wqflask/maintenance/"
+        "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2"
+        "(Oct08)_RankInv_Beta.txt")
     dataset_name = "Eye_AXBXA_1008_RankInv"
 
     cursor = get_cursor()
@@ -119,5 +124,6 @@ def main():
     trimmed_strains = trim_strains(strains, probeset_vals)
     write_data_matrix_file(trimmed_strains, probeset_vals, filename)
 
+
 if __name__ == '__main__':
     main()
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index 9579812a..32e0e34b 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -9,7 +9,6 @@ code
 
 """
 
-from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
@@ -26,11 +25,12 @@ from pprint import pformat as pf
 
 #from utility.tools import flat_files
 
-class EmptyConfigurations(Exception): pass
 
-        
+class EmptyConfigurations(Exception):
+    pass
 
-class Marker(object):
+
+class Marker:
     def __init__(self):
         self.name = None
         self.chr = None
@@ -38,23 +38,24 @@ class Marker(object):
         self.Mb = None
         self.genotypes = []
 
-class ConvertGenoFile(object):
+
+class ConvertGenoFile:
 
     def __init__(self, input_file, output_file):
-        
+
         self.input_file = input_file
         self.output_file = output_file
-        
+
         self.mb_exists = False
         self.cm_exists = False
         self.markers = []
-        
+
         self.latest_row_pos = None
         self.latest_col_pos = None
-        
+
         self.latest_row_value = None
         self.latest_col_value = None
-        
+
     def convert(self):
 
         self.haplotype_notation = {
@@ -62,24 +63,23 @@ class ConvertGenoFile(object):
             '@pat': "0",
             '@het': "0.5",
             '@unk': "NA"
-            }
-        
+        }
+
         self.configurations = {}
         #self.skipped_cols = 3
-        
-        #if self.input_file.endswith(".geno.gz"):
+
+        # if self.input_file.endswith(".geno.gz"):
         #    print("self.input_file: ", self.input_file)
         #    self.input_fh = gzip.open(self.input_file)
-        #else:
+        # else:
         self.input_fh = open(self.input_file)
-        
+
         with open(self.output_file, "w") as self.output_fh:
-            #if self.file_type == "geno":
+            # if self.file_type == "geno":
             self.process_csv()
-            #elif self.file_type == "snps":
+            # elif self.file_type == "snps":
             #    self.process_snps_file()
 
-
     def process_csv(self):
         for row_count, row in enumerate(self.process_rows()):
             row_items = row.split("\t")
@@ -101,31 +101,31 @@ class ConvertGenoFile(object):
                 genotypes = row_items[2:]
             for item_count, genotype in enumerate(genotypes):
                 if genotype.upper() in self.configurations:
-                    this_marker.genotypes.append(self.configurations[genotype.upper()])
+                    this_marker.genotypes.append(
+                        self.configurations[genotype.upper()])
                 else:
                     this_marker.genotypes.append("NA")
-                
-            #print("this_marker is:", pf(this_marker.__dict__))   
-            #if this_marker.chr == "14":
+
+            #print("this_marker is:", pf(this_marker.__dict__))
+            # if this_marker.chr == "14":
             self.markers.append(this_marker.__dict__)
 
         with open(self.output_file, 'w') as fh:
             json.dump(self.markers, fh, indent="   ", sort_keys=True)
-                
-                # print('configurations:', str(configurations))
-                #self.latest_col_pos = item_count + self.skipped_cols
-                #self.latest_col_value = item
-                
-                #if item_count != 0:
-                #    self.output_fh.write(" ")
-                #self.output_fh.write(self.configurations[item.upper()])
-                    
-            #self.output_fh.write("\n")
 
+            # print('configurations:', str(configurations))
+            #self.latest_col_pos = item_count + self.skipped_cols
+            #self.latest_col_value = item
+
+            # if item_count != 0:
+            #    self.output_fh.write(" ")
+            # self.output_fh.write(self.configurations[item.upper()])
+
+            # self.output_fh.write("\n")
 
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
-            #if self.input_file.endswith(".geno.gz"):
+            # if self.input_file.endswith(".geno.gz"):
             #    print("row: ", row)
             self.latest_row_value = row
             # Take care of headers
@@ -172,26 +172,25 @@ class ConvertGenoFile(object):
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %s at tabular column %s" % (convertob.latest_row_pos,
-                                                                convertob.latest_col_pos))
+                                                                    convertob.latest_col_pos))
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
-            
-    #def process_snps_file(cls, snps_file, new_directory):
+
+    # def process_snps_file(cls, snps_file, new_directory):
     #    output_file = os.path.join(new_directory, "mouse_families.json")
     #    print("%s -> %s" % (snps_file, output_file))
     #    convertob = ConvertGenoFile(input_file, output_file)
-        
 
 
-if __name__=="__main__":
+if __name__ == "__main__":
     Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
     New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
     #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
     #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
     #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
-    #convertob.convert()
+    # convertob.convert()
     ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
-    #ConvertGenoFiles(Geno_Directory)
-    
-    #process_csv(Input_File, Output_File)
\ No newline at end of file
+    # ConvertGenoFiles(Geno_Directory)
+
+    #process_csv(Input_File, Output_File)
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index fb22898a..0a450d3f 100644
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -1,17 +1,17 @@
-from __future__ import absolute_import, print_function, division
-
 import os
 import glob
 import gzip
 
 from base import webqtlConfig
 
+
 def get_samplelist(file_type, geno_file):
     if file_type == "geno":
         return get_samplelist_from_geno(geno_file)
     elif file_type == "plink":
         return get_samplelist_from_plink(geno_file)
 
+
 def get_samplelist_from_geno(genofilename):
     if os.path.isfile(genofilename + '.gz'):
         genofilename += '.gz'
@@ -35,6 +35,7 @@ def get_samplelist_from_geno(genofilename):
         samplelist = headers[3:]
     return samplelist
 
+
 def get_samplelist_from_plink(genofilename):
     genofile = open(genofilename)
 
diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py
index ae327cf3..9d12da8a 100644
--- a/wqflask/maintenance/print_benchmark.py
+++ b/wqflask/maintenance/print_benchmark.py
@@ -1,13 +1,11 @@
 #!/usr/bin/python
 
-from __future__ import absolute_import, print_function, division
-
 import time
 
 from pprint import pformat as pf
 
 
-class TheCounter(object):
+class TheCounter:
     Counters = {}
 
     def __init__(self):
@@ -17,15 +15,18 @@ class TheCounter(object):
         self.time_took = time.time() - start_time
         TheCounter.Counters[self.__class__.__name__] = self.time_took
 
+
 class PrintAll(TheCounter):
     def print_it(self, counter):
         print(counter)
 
+
 class PrintSome(TheCounter):
     def print_it(self, counter):
         if counter % 1000 == 0:
             print(counter)
 
+
 class PrintNone(TheCounter):
     def print_it(self, counter):
         pass
@@ -39,5 +40,6 @@ def new_main():
 
     print(pf(TheCounter.Counters))
 
+
 if __name__ == '__main__':
-    new_main()
\ No newline at end of file
+    new_main()
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index 41a3aad8..0cc963e5 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -1,12 +1,7 @@
-from __future__ import absolute_import, print_function, division
-
 import sys
-sys.path.insert(0,'./')
-
-from itertools import izip
-
+sys.path.insert(0, './')
 import MySQLdb
-import urlparse
+import urllib.parse
 
 import numpy as np
 import pandas as pd
@@ -19,48 +14,54 @@ from wqflask import app
 from utility.elasticsearch_tools import get_elasticsearch_connection
 from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI
 
+
 def parse_db_uri():
     """Converts a database URI to the db name, host name, user name, and password"""
 
-    parsed_uri = urlparse.urlparse(SQL_URI)
+    parsed_uri = urllib.parse.urlparse(SQL_URI)
 
     db_conn_info = dict(
-                        db = parsed_uri.path[1:],
-                        host = parsed_uri.hostname,
-                        user = parsed_uri.username,
-                        passwd = parsed_uri.password)
+        db=parsed_uri.path[1:],
+        host=parsed_uri.hostname,
+        user=parsed_uri.username,
+        passwd=parsed_uri.password)
 
     print(db_conn_info)
     return db_conn_info
 
+
 def create_dataframe(input_file):
     with open(input_file) as f:
         ncols = len(f.readline().split("\t"))
 
-    input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=range(1, ncols))
+    input_array = np.loadtxt(open(
+        input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
     return pd.DataFrame(input_array)
 
-#This function taken from https://github.com/ShawnLYU/Quantile_Normalize
+# This function taken from https://github.com/ShawnLYU/Quantile_Normalize
+
+
 def quantileNormalize(df_input):
     df = df_input.copy()
-    #compute rank
+    # compute rank
     dic = {}
     for col in df:
-        dic.update({col : sorted(df[col])})
+        dic.update({col: sorted(df[col])})
     sorted_df = pd.DataFrame(dic)
-    rank = sorted_df.mean(axis = 1).tolist()
-    #sort
+    rank = sorted_df.mean(axis=1).tolist()
+    # sort
     for col in df:
         t = np.searchsorted(np.sort(df[col]), df[col])
         df[col] = [rank[i] for i in t]
     return df
 
+
 def set_data(dataset_name):
     orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
 
     sample_list = []
     with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
-        for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)):
+        for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)):
             trait_dict = {}
             sample_list = []
             if i == 0:
@@ -69,10 +70,10 @@ def set_data(dataset_name):
                 trait_name = line1.split('\t')[0]
                 for i, sample in enumerate(sample_names):
                     this_sample = {
-                                    "name": sample,
-                                    "value": line1.split('\t')[i+1],
-                                    "qnorm": line2.split('\t')[i+1]
-                                  }
+                        "name": sample,
+                        "value": line1.split('\t')[i + 1],
+                        "qnorm": line2.split('\t')[i + 1]
+                    }
                     sample_list.append(this_sample)
                 query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
                            FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet
@@ -100,13 +101,14 @@ def set_data(dataset_name):
                     }
                 }
 
+
 if __name__ == '__main__':
     Conn = MySQLdb.Connect(**parse_db_uri())
     Cursor = Conn.cursor()
 
-    #es = Elasticsearch([{
+    # es = Elasticsearch([{
     #    "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
-    #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
+    # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
 
     es = get_elasticsearch_connection(for_user=False)
 
@@ -121,9 +123,9 @@ if __name__ == '__main__':
     success, _ = bulk(es, set_data(sys.argv[1]))
 
     response = es.search(
-        index = "traits", doc_type = "trait", body = {
-            "query": { "match": { "name": "ENSMUSG00000028982" } }
+        index="traits", doc_type="trait", body={
+            "query": {"match": {"name": "ENSMUSG00000028982"}}
         }
     )
 
-    print(response)
\ No newline at end of file
+    print(response)
diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py
new file mode 100644
index 00000000..0f472494
--- /dev/null
+++ b/wqflask/maintenance/set_resource_defaults.py
@@ -0,0 +1,171 @@
+"""
+
+Script that sets default resource access masks for use with the DB proxy
+
+Defaults will be:
+Owner - omni_gn
+Mask  - Public/non-confidential: { data: "view",
+                                   metadata: "view",
+                                   admin: "not-admin" }
+        Private/confidentia:     { data: "no-access",
+                                   metadata: "no-access",
+                                   admin: "not-admin" }
+
+To run:
+./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
+
+"""
+
+import sys
+import json
+
+# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
+sys.path.insert(0, './')
+
+# NEW: import app to avoid a circular dependency on utility.tools
+from wqflask import app
+
+from utility import hmac
+from utility.tools import SQL_URI
+from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info
+Redis = get_redis_conn()
+
+import MySQLdb
+
+import urllib.parse
+
+from utility.logger import getLogger
+logger = getLogger(__name__)
+
+
+def parse_db_uri():
+    """Converts a database URI to the db name, host name, user name, and password"""
+
+    parsed_uri = urllib.parse.urlparse(SQL_URI)
+
+    db_conn_info = dict(
+        db=parsed_uri.path[1:],
+        host=parsed_uri.hostname,
+        user=parsed_uri.username,
+        passwd=parsed_uri.password)
+
+    print(db_conn_info)
+    return db_conn_info
+
+
+def insert_probeset_resources(default_owner_id):
+    current_resources = Redis.hgetall("resources")
+    Cursor.execute("""  SELECT
+                            ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public
+                        FROM
+                            ProbeSetFreeze""")
+
+    resource_results = Cursor.fetchall()
+    for i, resource in enumerate(resource_results):
+        resource_ob = {}
+        resource_ob['name'] = resource[1]
+        resource_ob['owner_id'] = default_owner_id
+        resource_ob['data'] = {"dataset": str(resource[0])}
+        resource_ob['type'] = "dataset-probeset"
+        if resource[2] < 1 and resource[3] > 0:
+            resource_ob['default_mask'] = {"data": "view",
+                                           "metadata": "view",
+                                           "admin": "not-admin"}
+        else:
+            resource_ob['default_mask'] = {"data": "no-access",
+                                           "metadata": "no-access",
+                                           "admin": "not-admin"}
+        resource_ob['group_masks'] = {}
+
+        add_resource(resource_ob, update=False)
+
+
+def insert_publish_resources(default_owner_id):
+    current_resources = Redis.hgetall("resources")
+    Cursor.execute("""  SELECT 
+                            PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode
+                        FROM
+                            PublishXRef, PublishFreeze, InbredSet, Publication
+                        WHERE
+                            PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
+                            InbredSet.Id = PublishXRef.InbredSetId AND
+                            Publication.Id = PublishXRef.PublicationId""")
+
+    resource_results = Cursor.fetchall()
+    for resource in resource_results:
+        if resource[2]:
+            resource_ob = {}
+            if resource[2]:
+                resource_ob['name'] = resource[2] + "_" + str(resource[0])
+            else:
+                resource_ob['name'] = str(resource[0])
+            resource_ob['owner_id'] = default_owner_id
+            resource_ob['data'] = {"dataset": str(resource[1]),
+                                   "trait": str(resource[0])}
+            resource_ob['type'] = "dataset-publish"
+            resource_ob['default_mask'] = {"data": "view",
+                                           "metadata": "view",
+                                           "admin": "not-admin"}
+
+            resource_ob['group_masks'] = {}
+
+            add_resource(resource_ob, update=False)
+        else:
+            continue
+
+
+def insert_geno_resources(default_owner_id):
+    current_resources = Redis.hgetall("resources")
+    Cursor.execute("""  SELECT
+                            GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality
+                        FROM
+                            GenoFreeze""")
+
+    resource_results = Cursor.fetchall()
+    for i, resource in enumerate(resource_results):
+        resource_ob = {}
+        resource_ob['name'] = resource[1]
+        if resource[1] == "HET3-ITPGeno":
+            resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
+        else:
+            resource_ob['owner_id'] = default_owner_id
+        resource_ob['data'] = {"dataset": str(resource[0])}
+        resource_ob['type'] = "dataset-geno"
+        if resource[2] < 1:
+            resource_ob['default_mask'] = {"data": "view",
+                                           "metadata": "view",
+                                           "admin": "not-admin"}
+        else:
+            resource_ob['default_mask'] = {"data": "no-access",
+                                           "metadata": "no-access",
+                                           "admin": "not-admin"}
+        resource_ob['group_masks'] = {}
+
+        add_resource(resource_ob, update=False)
+
+
+def insert_resources(default_owner_id):
+    current_resources = get_resources()
+    print("START")
+    insert_publish_resources(default_owner_id)
+    print("AFTER PUBLISH")
+    insert_geno_resources(default_owner_id)
+    print("AFTER GENO")
+    insert_probeset_resources(default_owner_id)
+    print("AFTER PROBESET")
+
+
+def main():
+    """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
+
+    Redis.delete("resources")
+
+    owner_id = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
+
+    insert_resources(owner_id)
+
+
+if __name__ == '__main__':
+    Conn = MySQLdb.Connect(**parse_db_uri())
+    Cursor = Conn.cursor()
+    main()
author	Arthur Centeno	2021-10-25 21:04:23 +0000
committer	Arthur Centeno	2021-10-25 21:04:23 +0000
commit	499a80f138030c4de1629c043c8f9401a99894ea (patch)
tree	449dcae965d13f966fb6d52625fbc86661c8c6a0 /wqflask/maintenance
parent	6151faa9ea67af4bf4ea95fb681a9dc4319474b6 (diff)
parent	700802303e5e8221a9d591ba985d6607aa61e1ce (diff)
download	genenetwork2-499a80f138030c4de1629c043c8f9401a99894ea.tar.gz