1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
"""
This module contains functions to interact with the `qtlreaper` utility for
computation of QTLs.
"""
import os
import subprocess
from gn3.random import random_string
from gn3.settings import TMPDIR, REAPER_COMMAND
def generate_traits_file(strains, trait_values, traits_filename):
"""
Generate a traits file for use with `qtlreaper`.
PARAMETERS:
strains: A list of strains to use as the headers for the various columns.
trait_values: A list of lists of values for each trait and strain.
traits_filename: The tab-separated value to put the values in for
computation of QTLs.
"""
header = "Trait\t{}\n".format("\t".join(strains))
data = (
[header] +
["T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
for i, t in enumerate(trait_values[:-1])] +
["T{}\t{}".format(
len(trait_values), "\t".join([str(i) for i in t]))
for t in trait_values[-1:]])
with open(traits_filename, "w") as outfile:
outfile.writelines(data)
def create_output_directory(path: str):
"""Create the output directory at `path` if it does not exist."""
try:
os.mkdir(path)
except OSError:
pass
def run_reaper(
genotype_filename: str, traits_filename: str,
other_options: tuple = ("--n_permutations", "1000"),
separate_nperm_output: bool = False,
output_dir: str = TMPDIR):
"""
Run the QTLReaper command to compute the QTLs.
PARAMETERS:
genotype_filename: The complete path to a genotype file to use in the QTL
computation.
traits_filename: A path to a file previously generated with the
`generate_traits_file` function in this module, to be used in the QTL
computation.
other_options: Other options to pass to the `qtlreaper` command to modify
the QTL computations.
separate_nperm_output: A flag indicating whether or not to provide a
separate output for the permutations computation. The default is False,
which means by default, no separate output file is created.
output_dir: A path to the directory where the outputs are put
RETURNS:
The function returns a tuple of the main output file, and the output file
for the permutation computations. If the `separate_nperm_output` is `False`,
the second value in the tuple returned is `None`.
RAISES:
The function will raise a `subprocess.CalledProcessError` exception in case
of any errors running the `qtlreaper` command.
"""
create_output_directory("{}/qtlreaper".format(output_dir))
output_filename = "{}/qtlreaper/main_output_{}.txt".format(
output_dir, random_string(10))
output_list = ["--main_output", output_filename]
if separate_nperm_output:
permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
output_dir, random_string(10))
output_list = output_list + ["--permu_output", permu_output_filename]
else:
permu_output_filename = None
command_list = [
REAPER_COMMAND, "--geno", genotype_filename,
*other_options, # this splices the `other_options` list here
"--traits", traits_filename,
*output_list # this splices the `output_list` list here
]
subprocess.run(command_list, check=True)
return (output_filename, permu_output_filename)
def parse_reaper_main_results(results_file):
"""
Parse the results file of running QTLReaper into a list of dicts.
"""
with open(results_file, "r") as infile:
lines = infile.readlines()
def __parse_column_value(value):
try:
return float(value)
except:
return value
def __parse_line(line):
items = line.strip().split("\t")
return items[0:2] + [__parse_column_value(item) for item in items[2:]]
header = lines[0].strip().split("\t")
return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
def parse_reaper_permutation_results(results_file):
"""
Parse the results QTLReaper permutations into a list of values.
"""
with open(results_file, "r") as infile:
lines = infile.readlines()
return [float(line.strip()) for line in lines]
|