1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
import csv
import os
input_dir = "/export2/local/home/zas1024/gn2-zach/hxbbxh-genotypes/hao/error0.001"
output_dir = "/export2/local/home/zas1024/gn2-zach/hxbbxh-genotypes/hao/output"
base_dict = {
'0': 'B',
'2': 'D',
'1': 'H'
}
chromosomes = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","X","Y"]
sample_list = ["BXH2","BXH3","BXH5","BXH6","BXH8","BXH9","BXH10","BXH11","BXH12","BXH12a","BXH13","HXB1","HXB2","HXB3","HXB4","HXB5","HXB7","HXB9","HXB10","HXB13","HXB14","HXB15","HXB16","HXB17","HXB18","HXB19","HXB20","HXB21","HXB22","HXB23","HXB24","HXB25","HXB26","HXB27","HXB29","HXB30","HXB31"]
row_list = [
["#type riset or intercross"],
["@type:riset"],
["@name:HXB/BXH"],
["#abbreviation of maternal or paternal parents"],
["@mat:B"],
["@pat:D"],
["#heterozygous , optional, default is \"H\""],
["@het:H"],
["#Unknown , optional, default is \"U\""],
["@unk:U"]
]
file_sample_list = []
trimmed_samples = []
# This is a convoluted way to fix the order of samples to be the same as in GN
sample_mapping = []
for chromosome in chromosomes:
f = os.path.join(input_dir, f"HXB_genotype_chr{chromosome}_dup_removed_smoothed_by_rqtl_error0.001_dup_removed_again_012.csv")
if os.path.isfile(f):
with open(f, "r") as the_file:
all_rows = [row.split() for row in the_file]
all_rows = [[item.replace('"', '') for item in col] for col in zip(*all_rows)]
if not len(file_sample_list):
file_sample_list = [sample.replace("_mRatNor1", "").split("_")[0] for sample in all_rows[0][4:-1]]
for sample in sample_list:
if sample in file_sample_list:
trimmed_samples.append(sample)
sample_mapping.append(file_sample_list.index(sample))
row_list.append(["Chr", "Locus", "cM", "Mb"] + trimmed_samples)
for row in all_rows[1:]:
this_mb = str(float(row[0].split(":")[1])/1000000)
this_row = [row[1], row[0], row[2], this_mb]
genotypes = row[4:-1]
for i in range(len(trimmed_samples)):
this_row.append(base_dict[genotypes[sample_mapping[i]]])
row_list.append(this_row)
with open(os.path.join(output_dir, "HXBBXH_new.geno"), "w") as out_file:
for line in row_list:
out_file.write("\t".join(line) + "\n")
|