diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/convert_dryad_to_bimbam.py | 6 | ||||
-rw-r--r-- | wqflask/maintenance/convert_geno_to_bimbam.py | 10 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 16 | ||||
-rw-r--r-- | wqflask/maintenance/generate_kinship_from_bimbam.py | 4 | ||||
-rw-r--r-- | wqflask/maintenance/geno_to_json.py | 26 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 30 | ||||
-rw-r--r-- | wqflask/maintenance/set_resource_defaults.py | 26 |
7 files changed, 59 insertions, 59 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py index 12ce35e9..8eab66e8 100644 --- a/wqflask/maintenance/convert_dryad_to_bimbam.py +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -41,7 +41,7 @@ def read_dryad_file(filename): return geno_rows - #for i, marker in enumerate(marker_list): + # for i, marker in enumerate(marker_list): # this_row = [] # this_row.append(marker) # this_row.append("X") @@ -53,7 +53,7 @@ def read_dryad_file(filename): # print("row: " + str(i)) # geno_rows.append(this_row) # - #return geno_rows + # return geno_rows def write_bimbam_files(geno_rows): with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: @@ -64,6 +64,6 @@ def convert_dryad_to_bimbam(filename): geno_file_rows = read_dryad_file(filename) write_bimbam_files(geno_file_rows) -if __name__=="__main__": +if __name__ == "__main__": input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" convert_dryad_to_bimbam(input_filename) diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index 0853b3ac..dc01cbb3 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -103,9 +103,9 @@ class ConvertGenoFile: with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: - snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write(marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") else: - snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write(marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") def get_sample_list(self, row_contents): self.sample_list = [] @@ -178,12 +178,12 @@ class ConvertGenoFile: print(" Row is:", convertob.latest_row_value) break -if __name__=="__main__": +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) + # ConvertGenoFiles(Geno_Directory) diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 544e2fd1..f480d63f 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -61,10 +61,10 @@ def parse_db_uri(): parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info @@ -120,7 +120,7 @@ def get_types(groups): if not phenotypes_exist(group_name) and not genotypes_exist(group_name): types[species].pop(group_name, None) groups[species] = tuple(group for group in groups[species] if group[0] != group_name) - else: #ZS: This whole else statement might be unnecessary, need to check + else: # ZS: This whole else statement might be unnecessary, need to check types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] = types_list @@ -133,7 +133,7 @@ def get_types(groups): def phenotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from PublishFreeze - where PublishFreeze.Name = '%s'""" % (group_name+"Publish")) + where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -146,7 +146,7 @@ def phenotypes_exist(group_name): def genotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from GenoFreeze - where GenoFreeze.Name = '%s'""" % (group_name+"Geno")) + where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -246,7 +246,7 @@ def build_datasets(species, group, type_name): dataset_text = "%s Genotypes" % group datasets.append((dataset_id, dataset_value, dataset_text)) - else: # for mRNA expression/ProbeSet + else: # for mRNA expression/ProbeSet Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index 3e4d1741..664e9e48 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -52,9 +52,9 @@ class GenerateKinshipMatrices: break -if __name__=="__main__": +if __name__ == "__main__": Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) - #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD + # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index f5f7e0e7..fa0dcebd 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -66,16 +66,16 @@ class ConvertGenoFile: self.configurations = {} #self.skipped_cols = 3 - #if self.input_file.endswith(".geno.gz"): + # if self.input_file.endswith(".geno.gz"): # print("self.input_file: ", self.input_file) # self.input_fh = gzip.open(self.input_file) - #else: + # else: self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: - #if self.file_type == "geno": + # if self.file_type == "geno": self.process_csv() - #elif self.file_type == "snps": + # elif self.file_type == "snps": # self.process_snps_file() @@ -105,7 +105,7 @@ class ConvertGenoFile: this_marker.genotypes.append("NA") #print("this_marker is:", pf(this_marker.__dict__)) - #if this_marker.chr == "14": + # if this_marker.chr == "14": self.markers.append(this_marker.__dict__) with open(self.output_file, 'w') as fh: @@ -115,16 +115,16 @@ class ConvertGenoFile: #self.latest_col_pos = item_count + self.skipped_cols #self.latest_col_value = item - #if item_count != 0: + # if item_count != 0: # self.output_fh.write(" ") - #self.output_fh.write(self.configurations[item.upper()]) + # self.output_fh.write(self.configurations[item.upper()]) - #self.output_fh.write("\n") + # self.output_fh.write("\n") def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): - #if self.input_file.endswith(".geno.gz"): + # if self.input_file.endswith(".geno.gz"): # print("row: ", row) self.latest_row_value = row # Take care of headers @@ -176,21 +176,21 @@ class ConvertGenoFile: print(" Row is:", convertob.latest_row_value) break - #def process_snps_file(cls, snps_file, new_directory): + # def process_snps_file(cls, snps_file, new_directory): # output_file = os.path.join(new_directory, "mouse_families.json") # print("%s -> %s" % (snps_file, output_file)) # convertob = ConvertGenoFile(input_file, output_file) -if __name__=="__main__": +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) + # ConvertGenoFiles(Geno_Directory) #process_csv(Input_File, Output_File) diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 701b2b50..6751a8e5 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -20,10 +20,10 @@ def parse_db_uri(): parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info @@ -35,16 +35,16 @@ def create_dataframe(input_file): input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) return pd.DataFrame(input_array) -#This function taken from https://github.com/ShawnLYU/Quantile_Normalize +# This function taken from https://github.com/ShawnLYU/Quantile_Normalize def quantileNormalize(df_input): df = df_input.copy() - #compute rank + # compute rank dic = {} for col in df: - dic.update({col : sorted(df[col])}) + dic.update({col: sorted(df[col])}) sorted_df = pd.DataFrame(dic) - rank = sorted_df.mean(axis = 1).tolist() - #sort + rank = sorted_df.mean(axis=1).tolist() + # sort for col in df: t = np.searchsorted(np.sort(df[col]), df[col]) df[col] = [rank[i] for i in t] @@ -65,8 +65,8 @@ def set_data(dataset_name): for i, sample in enumerate(sample_names): this_sample = { "name": sample, - "value": line1.split('\t')[i+1], - "qnorm": line2.split('\t')[i+1] + "value": line1.split('\t')[i + 1], + "qnorm": line2.split('\t')[i + 1] } sample_list.append(this_sample) query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName @@ -99,9 +99,9 @@ if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - #es = Elasticsearch([{ + # es = Elasticsearch([{ # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT - #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None + # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None es = get_elasticsearch_connection(for_user=False) @@ -116,8 +116,8 @@ if __name__ == '__main__': success, _ = bulk(es, set_data(sys.argv[1])) response = es.search( - index = "traits", doc_type = "trait", body = { - "query": { "match": { "name": "ENSMUSG00000028982" } } + index="traits", doc_type="trait", body = { + "query": {"match": {"name": "ENSMUSG00000028982"}} } ) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 4177c124..286094dd 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -43,10 +43,10 @@ def parse_db_uri(): parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info @@ -63,14 +63,14 @@ def insert_probeset_resources(default_owner_id): resource_ob = {} resource_ob['name'] = resource[1] resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-probeset" if resource[2] < 1 and resource[3] > 0: - resource_ob['default_mask'] = { "data": "view", + resource_ob['default_mask'] = {"data": "view", "metadata": "view", "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", + resource_ob['default_mask'] = {"data": "no-access", "metadata": "no-access", "admin": "not-admin"} resource_ob['group_masks'] = {} @@ -97,10 +97,10 @@ def insert_publish_resources(default_owner_id): else: resource_ob['name'] = str(resource[0]) resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[1]) , - "trait" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[1]), + "trait": str(resource[0])} resource_ob['type'] = "dataset-publish" - resource_ob['default_mask'] = { "data": "view", + resource_ob['default_mask'] = {"data": "view", "metadata": "view", "admin": "not-admin"} @@ -125,14 +125,14 @@ def insert_geno_resources(default_owner_id): resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" else: resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0]) } + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-geno" if resource[2] < 1: - resource_ob['default_mask'] = { "data": "view", + resource_ob['default_mask'] = {"data": "view", "metadata": "view", "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", + resource_ob['default_mask'] = {"data": "no-access", "metadata": "no-access", "admin": "not-admin"} resource_ob['group_masks'] = {} |