aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/convert_geno_to_bimbam.py21
-rw-r--r--wqflask/maintenance/gen_select_dataset.py6
-rw-r--r--wqflask/maintenance/generate_kinship_from_bimbam.py13
-rw-r--r--wqflask/maintenance/geno_to_json.py3
-rw-r--r--wqflask/maintenance/quantile_normalize.py3
5 files changed, 31 insertions, 15 deletions
diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py
index 5b2369c9..a1712500 100644
--- a/wqflask/maintenance/convert_geno_to_bimbam.py
+++ b/wqflask/maintenance/convert_geno_to_bimbam.py
@@ -83,7 +83,8 @@ class ConvertGenoFile:
genotypes = row_items[2:]
for item_count, genotype in enumerate(genotypes):
if genotype.upper().strip() in self.configurations:
- this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+ this_marker.genotypes.append(
+ self.configurations[genotype.upper().strip()])
else:
this_marker.genotypes.append("NA")
@@ -106,9 +107,11 @@ class ConvertGenoFile:
with open(self.output_files[2], "w") as snp_fh:
for marker in self.markers:
if self.mb_exists:
- snp_fh.write(marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(
+ marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n")
else:
- snp_fh.write(marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
+ snp_fh.write(
+ marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n")
def get_sample_list(self, row_contents):
self.sample_list = []
@@ -160,10 +163,14 @@ class ConvertGenoFile:
group_name = ".".join(input_file.split('.')[:-1])
if group_name == "HSNIH-Palmer":
continue
- geno_output_file = os.path.join(new_directory, group_name + "_geno.txt")
- pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt")
- snp_output_file = os.path.join(new_directory, group_name + "_snps.txt")
- output_files = [geno_output_file, pheno_output_file, snp_output_file]
+ geno_output_file = os.path.join(
+ new_directory, group_name + "_geno.txt")
+ pheno_output_file = os.path.join(
+ new_directory, group_name + "_pheno.txt")
+ snp_output_file = os.path.join(
+ new_directory, group_name + "_snps.txt")
+ output_files = [geno_output_file,
+ pheno_output_file, snp_output_file]
print("%s -> %s" % (
os.path.join(old_directory, input_file), geno_output_file))
convertob = ConvertGenoFile(input_file, output_files)
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 583a06e1..484336a6 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -120,14 +120,16 @@ def get_types(groups):
else:
if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
types[species].pop(group_name, None)
- groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+ groups[species] = tuple(
+ group for group in groups[species] if group[0] != group_name)
else: # ZS: This whole else statement might be unnecessary, need to check
types_list = build_types(species, group_name)
if len(types_list) > 0:
types[species][group_name] = types_list
else:
types[species].pop(group_name, None)
- groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+ groups[species] = tuple(
+ group for group in groups[species] if group[0] != group_name)
return types
diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py
index 7cc60c9e..bed634fa 100644
--- a/wqflask/maintenance/generate_kinship_from_bimbam.py
+++ b/wqflask/maintenance/generate_kinship_from_bimbam.py
@@ -21,7 +21,9 @@ class GenerateKinshipMatrices:
self.pheno_file = pheno_file
def generate_kinship(self):
- gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
+ gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \
+ " -p " + self.pheno_file + \
+ " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name
print("command:", gemma_command)
os.system(gemma_command)
@@ -34,9 +36,12 @@ class GenerateKinshipMatrices:
group_name = ".".join(input_file.split('.')[:-1])
if group_name == "HSNIH-Palmer":
continue
- geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt")
- pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt")
- convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file)
+ geno_input_file = os.path.join(
+ bimbam_dir, group_name + "_geno.txt")
+ pheno_input_file = os.path.join(
+ bimbam_dir, group_name + "_pheno.txt")
+ convertob = GenerateKinshipMatrices(
+ group_name, geno_input_file, pheno_input_file)
try:
convertob.generate_kinship()
except EmptyConfigurations as why:
diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py
index ad3f2b72..7bdf2b53 100644
--- a/wqflask/maintenance/geno_to_json.py
+++ b/wqflask/maintenance/geno_to_json.py
@@ -100,7 +100,8 @@ class ConvertGenoFile:
genotypes = row_items[2:]
for item_count, genotype in enumerate(genotypes):
if genotype.upper() in self.configurations:
- this_marker.genotypes.append(self.configurations[genotype.upper()])
+ this_marker.genotypes.append(
+ self.configurations[genotype.upper()])
else:
this_marker.genotypes.append("NA")
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index 1896bc52..ac7689f5 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -34,7 +34,8 @@ def create_dataframe(input_file):
with open(input_file) as f:
ncols = len(f.readline().split("\t"))
- input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
+ input_array = np.loadtxt(open(
+ input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
return pd.DataFrame(input_array)
# This function taken from https://github.com/ShawnLYU/Quantile_Normalize