aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/convert_dol_genotypes.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/scripts/convert_dol_genotypes.py b/scripts/convert_dol_genotypes.py
index 81b3bd6d..5cda2e9c 100644
--- a/scripts/convert_dol_genotypes.py
+++ b/scripts/convert_dol_genotypes.py
@@ -1,6 +1,8 @@
# This is just to convert the Rqtl2 format genotype files for DOL into a .geno file
# Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly
+# This is to be used on the files generated as described by Karl Broman here - https://kbroman.org/qtl2/pages/prep_do_data.html
+
import os
geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/"
@@ -30,9 +32,13 @@ for filename in os.listdir(geno_dir):
if i < 3:
continue
elif not len(sample_names) and i == 3:
- sample_names = [item.replace("TLB", "TB") for item in line_items[1:]]
+ sample_names_positions = [[item.replace("TLB", "TB").strip(), i] for i, item in enumerate(line_items[1:])]
+ sample_names_positions.sort(key = lambda x: x[0][2:])
+ sample_names = [sample[0] for sample in sample_names_positions]
elif i > 3:
- marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]]
+ genotypes = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]]
+ ordered_genotypes = [genotypes[i].strip() for i in [pos[1] for pos in sample_names_positions]]
+ marker_data[line_items[0]]['genotypes'] = ordered_genotypes
# Generate list of marker obs to iterate through when writing to .geno file
marker_list = []
@@ -46,6 +52,7 @@ for key, value in marker_data.items():
}
marker_list.append(this_marker)
+
def sort_func(e):
"""For ensuring that X/Y chromosomes/mitochondria are sorted to the end correctly"""
try:
@@ -63,7 +70,7 @@ marker_list.sort(key=sort_func)
# Write lines to .geno file
with open(gn_geno_path, "w") as gn_geno_fh:
- gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names)))
+ gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names)) + "\n")
for marker in marker_list:
row_contents = [
marker['chr'],
@@ -72,3 +79,4 @@ with open(gn_geno_path, "w") as gn_geno_fh:
marker['pos']
] + marker['genotypes']
gn_geno_fh.write("\t".join(row_contents) + "\n")
+