From 69676ecd49a779350ef7bd3faa348c7f27602a20 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 17 Dec 2021 20:10:33 +0000 Subject: Some updates to the DOL genotypes conversion script --- scripts/convert_dol_genotypes.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/convert_dol_genotypes.py b/scripts/convert_dol_genotypes.py index 81b3bd6d..5cda2e9c 100644 --- a/scripts/convert_dol_genotypes.py +++ b/scripts/convert_dol_genotypes.py @@ -1,6 +1,8 @@ # This is just to convert the Rqtl2 format genotype files for DOL into a .geno file # Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly +# This is to be used on the files generated as described by Karl Broman here - https://kbroman.org/qtl2/pages/prep_do_data.html + import os geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/" @@ -30,9 +32,13 @@ for filename in os.listdir(geno_dir): if i < 3: continue elif not len(sample_names) and i == 3: - sample_names = [item.replace("TLB", "TB") for item in line_items[1:]] + sample_names_positions = [[item.replace("TLB", "TB").strip(), i] for i, item in enumerate(line_items[1:])] + sample_names_positions.sort(key = lambda x: x[0][2:]) + sample_names = [sample[0] for sample in sample_names_positions] elif i > 3: - marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]] + genotypes = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]] + ordered_genotypes = [genotypes[i].strip() for i in [pos[1] for pos in sample_names_positions]] + marker_data[line_items[0]]['genotypes'] = ordered_genotypes # Generate list of marker obs to iterate through when writing to .geno file marker_list = [] @@ -46,6 +52,7 @@ for key, value in marker_data.items(): } marker_list.append(this_marker) + def sort_func(e): """For ensuring that X/Y chromosomes/mitochondria are sorted to the end correctly""" try: @@ -63,7 +70,7 @@ marker_list.sort(key=sort_func) # Write lines to .geno file with open(gn_geno_path, "w") as gn_geno_fh: - gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names))) + gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names)) + "\n") for marker in marker_list: row_contents = [ marker['chr'], @@ -72,3 +79,4 @@ with open(gn_geno_path, "w") as gn_geno_fh: marker['pos'] ] + marker['genotypes'] gn_geno_fh.write("\t".join(row_contents) + "\n") + -- cgit v1.2.3