about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/convert_dol_genotypes.py14
1 files changed, 11 insertions, 3 deletions
diff --git a/scripts/convert_dol_genotypes.py b/scripts/convert_dol_genotypes.py
index 81b3bd6d..5cda2e9c 100644
--- a/scripts/convert_dol_genotypes.py
+++ b/scripts/convert_dol_genotypes.py
@@ -1,6 +1,8 @@
 # This is just to convert the Rqtl2 format genotype files for DOL into a .geno file
 # Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly
 
+# This is to be used on the files generated as described by Karl Broman here - https://kbroman.org/qtl2/pages/prep_do_data.html
+
 import os
 
 geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/"
@@ -30,9 +32,13 @@ for filename in os.listdir(geno_dir):
                 if i < 3:
                     continue
                 elif not len(sample_names) and i == 3:
-                    sample_names = [item.replace("TLB", "TB") for item in line_items[1:]]
+                    sample_names_positions = [[item.replace("TLB", "TB").strip(), i] for i, item in enumerate(line_items[1:])]
+                    sample_names_positions.sort(key = lambda x: x[0][2:])
+                    sample_names = [sample[0] for sample in sample_names_positions]
                 elif i > 3:
-                    marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]]
+                    genotypes = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]]
+                    ordered_genotypes = [genotypes[i].strip() for i in [pos[1] for pos in sample_names_positions]]
+                    marker_data[line_items[0]]['genotypes'] = ordered_genotypes
 
 # Generate list of marker obs to iterate through when writing to .geno file
 marker_list = []
@@ -46,6 +52,7 @@ for key, value in marker_data.items():
         }
         marker_list.append(this_marker)
 
+
 def sort_func(e):
     """For ensuring that X/Y chromosomes/mitochondria are sorted to the end correctly"""
     try:
@@ -63,7 +70,7 @@ marker_list.sort(key=sort_func)
 
 # Write lines to .geno file
 with open(gn_geno_path, "w") as gn_geno_fh:
-    gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names)))
+    gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names)) + "\n")
     for marker in marker_list:
         row_contents = [
             marker['chr'],
@@ -72,3 +79,4 @@ with open(gn_geno_path, "w") as gn_geno_fh:
             marker['pos']
         ] + marker['genotypes']
         gn_geno_fh.write("\t".join(row_contents) + "\n")
+