aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_dryad_to_bimbam.py6
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py10
-rw-r--r--wqflask/maintenance/gen_select_dataset.py16
-rw-r--r--wqflask/maintenance/generate_kinship_from_bimbam.py4
-rw-r--r--wqflask/maintenance/geno_to_json.py26
-rw-r--r--wqflask/maintenance/quantile_normalize.py30
-rw-r--r--wqflask/maintenance/set_resource_defaults.py26
7 files changed, 59 insertions, 59 deletions
diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py
index 12ce35e9..8eab66e8 100644
--- a/wqflask/maintenance/convert_dryad_to_bimbam.py
+++ b/wqflask/maintenance/convert_dryad_to_bimbam.py
@@ -41,7 +41,7 @@ def read_dryad_file(filename):
return geno_rows
- #for i, marker in enumerate(marker_list):
+ # for i, marker in enumerate(marker_list):
# this_row = []
# this_row.append(marker)
# this_row.append("X")
@@ -53,7 +53,7 @@ def read_dryad_file(filename):
# print("row: " + str(i))
# geno_rows.append(this_row)
#
- #return geno_rows
+ # return geno_rows
def write_bimbam_files(geno_rows):
with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh:
@@ -64,6 +64,6 @@ def convert_dryad_to_bimbam(filename):
geno_file_rows = read_dryad_file(filename)
write_bimbam_files(geno_file_rows)
-if __name__=="__main__":
+if __name__ == "__main__":
input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
convert_dryad_to_bimbam(input_filename)
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 0853b3ac..dc01cbb3 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -103,9 +103,9 @@ class ConvertGenoFile:
with open(self.output_files[2], "w") as snp_fh:
for marker in self.markers:
if self.mb_exists:
- snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
else:
- snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
def get_sample_list(self, row_contents):
self.sample_list = []
@@ -178,12 +178,12 @@ class ConvertGenoFile:
print(" Row is:", convertob.latest_row_value)
break
-if __name__=="__main__":
+if __name__ == "__main__":
Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- #convertob.convert()
+ # convertob.convert()
ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- #ConvertGenoFiles(Geno_Directory)
+ # ConvertGenoFiles(Geno_Directory)
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 544e2fd1..f480d63f 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -61,10 +61,10 @@ def parse_db_uri():
parsed_uri = urllib.parse.urlparse(SQL_URI)
db_conn_info = dict(
- db = parsed_uri.path[1:],
- host = parsed_uri.hostname,
- user = parsed_uri.username,
- passwd = parsed_uri.password)
+ db=parsed_uri.path[1:],
+ host=parsed_uri.hostname,
+ user=parsed_uri.username,
+ passwd=parsed_uri.password)
print(db_conn_info)
return db_conn_info
@@ -120,7 +120,7 @@ def get_types(groups):
if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
types[species].pop(group_name, None)
groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
- else: #ZS: This whole else statement might be unnecessary, need to check
+ else: # ZS: This whole else statement might be unnecessary, need to check
types_list = build_types(species, group_name)
if len(types_list) > 0:
types[species][group_name] = types_list
@@ -133,7 +133,7 @@ def get_types(groups):
def phenotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from PublishFreeze
- where PublishFreeze.Name = '%s'""" % (group_name+"Publish"))
+ where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
results = Cursor.fetchone()
#print("RESULTS:", results)
@@ -146,7 +146,7 @@ def phenotypes_exist(group_name):
def genotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from GenoFreeze
- where GenoFreeze.Name = '%s'""" % (group_name+"Geno"))
+ where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
results = Cursor.fetchone()
#print("RESULTS:", results)
@@ -246,7 +246,7 @@ def build_datasets(species, group, type_name):
dataset_text = "%s Genotypes" % group
datasets.append((dataset_id, dataset_value, dataset_text))
- else: # for mRNA expression/ProbeSet
+ else: # for mRNA expression/ProbeSet
Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
index 3e4d1741..664e9e48 100644
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -52,9 +52,9 @@ class GenerateKinshipMatrices:
break
-if __name__=="__main__":
+if __name__ == "__main__":
Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/"""
Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/"""
GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory)
- #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
+ # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index f5f7e0e7..fa0dcebd 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -66,16 +66,16 @@ class ConvertGenoFile:
self.configurations = {}
#self.skipped_cols = 3
- #if self.input_file.endswith(".geno.gz"):
+ # if self.input_file.endswith(".geno.gz"):
# print("self.input_file: ", self.input_file)
# self.input_fh = gzip.open(self.input_file)
- #else:
+ # else:
self.input_fh = open(self.input_file)
with open(self.output_file, "w") as self.output_fh:
- #if self.file_type == "geno":
+ # if self.file_type == "geno":
self.process_csv()
- #elif self.file_type == "snps":
+ # elif self.file_type == "snps":
# self.process_snps_file()
@@ -105,7 +105,7 @@ class ConvertGenoFile:
this_marker.genotypes.append("NA")
#print("this_marker is:", pf(this_marker.__dict__))
- #if this_marker.chr == "14":
+ # if this_marker.chr == "14":
self.markers.append(this_marker.__dict__)
with open(self.output_file, 'w') as fh:
@@ -115,16 +115,16 @@ class ConvertGenoFile:
#self.latest_col_pos = item_count + self.skipped_cols
#self.latest_col_value = item
- #if item_count != 0:
+ # if item_count != 0:
# self.output_fh.write(" ")
- #self.output_fh.write(self.configurations[item.upper()])
+ # self.output_fh.write(self.configurations[item.upper()])
- #self.output_fh.write("\n")
+ # self.output_fh.write("\n")
def process_rows(self):
for self.latest_row_pos, row in enumerate(self.input_fh):
- #if self.input_file.endswith(".geno.gz"):
+ # if self.input_file.endswith(".geno.gz"):
# print("row: ", row)
self.latest_row_value = row
# Take care of headers
@@ -176,21 +176,21 @@ class ConvertGenoFile:
print(" Row is:", convertob.latest_row_value)
break
- #def process_snps_file(cls, snps_file, new_directory):
+ # def process_snps_file(cls, snps_file, new_directory):
# output_file = os.path.join(new_directory, "mouse_families.json")
# print("%s -> %s" % (snps_file, output_file))
# convertob = ConvertGenoFile(input_file, output_file)
-if __name__=="__main__":
+if __name__ == "__main__":
Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype"""
New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json"""
#Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno"""
#Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps"""
#convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json")
- #convertob.convert()
+ # convertob.convert()
ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory)
- #ConvertGenoFiles(Geno_Directory)
+ # ConvertGenoFiles(Geno_Directory)
#process_csv(Input_File, Output_File)
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index 701b2b50..6751a8e5 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -20,10 +20,10 @@ def parse_db_uri():
parsed_uri = urllib.parse.urlparse(SQL_URI)
db_conn_info = dict(
- db = parsed_uri.path[1:],
- host = parsed_uri.hostname,
- user = parsed_uri.username,
- passwd = parsed_uri.password)
+ db=parsed_uri.path[1:],
+ host=parsed_uri.hostname,
+ user=parsed_uri.username,
+ passwd=parsed_uri.password)
print(db_conn_info)
return db_conn_info
@@ -35,16 +35,16 @@ def create_dataframe(input_file):
input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
return pd.DataFrame(input_array)
-#This function taken from https://github.com/ShawnLYU/Quantile_Normalize
+# This function taken from https://github.com/ShawnLYU/Quantile_Normalize
def quantileNormalize(df_input):
df = df_input.copy()
- #compute rank
+ # compute rank
dic = {}
for col in df:
- dic.update({col : sorted(df[col])})
+ dic.update({col: sorted(df[col])})
sorted_df = pd.DataFrame(dic)
- rank = sorted_df.mean(axis = 1).tolist()
- #sort
+ rank = sorted_df.mean(axis=1).tolist()
+ # sort
for col in df:
t = np.searchsorted(np.sort(df[col]), df[col])
df[col] = [rank[i] for i in t]
@@ -65,8 +65,8 @@ def set_data(dataset_name):
for i, sample in enumerate(sample_names):
this_sample = {
"name": sample,
- "value": line1.split('\t')[i+1],
- "qnorm": line2.split('\t')[i+1]
+ "value": line1.split('\t')[i + 1],
+ "qnorm": line2.split('\t')[i + 1]
}
sample_list.append(this_sample)
query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
@@ -99,9 +99,9 @@ if __name__ == '__main__':
Conn = MySQLdb.Connect(**parse_db_uri())
Cursor = Conn.cursor()
- #es = Elasticsearch([{
+ # es = Elasticsearch([{
# "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
- #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
+ # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
es = get_elasticsearch_connection(for_user=False)
@@ -116,8 +116,8 @@ if __name__ == '__main__':
success, _ = bulk(es, set_data(sys.argv[1]))
response = es.search(
- index = "traits", doc_type = "trait", body = {
- "query": { "match": { "name": "ENSMUSG00000028982" } }
+ index="traits", doc_type="trait", body = {
+ "query": {"match": {"name": "ENSMUSG00000028982"}}
}
)
diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py
index 4177c124..286094dd 100644
--- a/wqflask/maintenance/set_resource_defaults.py
+++ b/wqflask/maintenance/set_resource_defaults.py
@@ -43,10 +43,10 @@ def parse_db_uri():
parsed_uri = urllib.parse.urlparse(SQL_URI)
db_conn_info = dict(
- db = parsed_uri.path[1:],
- host = parsed_uri.hostname,
- user = parsed_uri.username,
- passwd = parsed_uri.password)
+ db=parsed_uri.path[1:],
+ host=parsed_uri.hostname,
+ user=parsed_uri.username,
+ passwd=parsed_uri.password)
print(db_conn_info)
return db_conn_info
@@ -63,14 +63,14 @@ def insert_probeset_resources(default_owner_id):
resource_ob = {}
resource_ob['name'] = resource[1]
resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = { "dataset" : str(resource[0])}
+ resource_ob['data'] = {"dataset": str(resource[0])}
resource_ob['type'] = "dataset-probeset"
if resource[2] < 1 and resource[3] > 0:
- resource_ob['default_mask'] = { "data": "view",
+ resource_ob['default_mask'] = {"data": "view",
"metadata": "view",
"admin": "not-admin"}
else:
- resource_ob['default_mask'] = { "data": "no-access",
+ resource_ob['default_mask'] = {"data": "no-access",
"metadata": "no-access",
"admin": "not-admin"}
resource_ob['group_masks'] = {}
@@ -97,10 +97,10 @@ def insert_publish_resources(default_owner_id):
else:
resource_ob['name'] = str(resource[0])
resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = { "dataset" : str(resource[1]) ,
- "trait" : str(resource[0])}
+ resource_ob['data'] = {"dataset": str(resource[1]),
+ "trait": str(resource[0])}
resource_ob['type'] = "dataset-publish"
- resource_ob['default_mask'] = { "data": "view",
+ resource_ob['default_mask'] = {"data": "view",
"metadata": "view",
"admin": "not-admin"}
@@ -125,14 +125,14 @@ def insert_geno_resources(default_owner_id):
resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae"
else:
resource_ob['owner_id'] = default_owner_id
- resource_ob['data'] = { "dataset" : str(resource[0]) }
+ resource_ob['data'] = {"dataset": str(resource[0])}
resource_ob['type'] = "dataset-geno"
if resource[2] < 1:
- resource_ob['default_mask'] = { "data": "view",
+ resource_ob['default_mask'] = {"data": "view",
"metadata": "view",
"admin": "not-admin"}
else:
- resource_ob['default_mask'] = { "data": "no-access",
+ resource_ob['default_mask'] = {"data": "no-access",
"metadata": "no-access",
"admin": "not-admin"}
resource_ob['group_masks'] = {}