aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/api/mapping.py
blob: 1e33096338e87b02ec0133af176ce99d82c2d687 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from gn2.base import data_set
from gn2.base.trait import create_trait, retrieve_sample_data

from gn2.wqflask.marker_regression import gemma_mapping, rqtl_mapping
from gn2.wqflask.show_trait.show_trait import normf

def do_mapping_for_api(start_vars):
    if ('db' not in start_vars) or ("trait_id" not in start_vars):
        raise ValueError("Mapping: db and trait_id are not in start_vars")

    dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    dataset.group.get_markers()
    this_trait = create_trait(dataset=dataset, name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, dataset)

    samples = []
    vals = []

    mapping_params = initialize_parameters(start_vars, dataset, this_trait)

    genofile_samplelist = []
    if mapping_params.get('genofile'):
        dataset.group.genofile = mapping_params['genofile']
        genofile_samplelist = get_genofile_samplelist(dataset)

    if (len(genofile_samplelist) > 0):
        samplelist = genofile_samplelist
        for sample in samplelist:
            in_trait_data = False
            for item in this_trait.data:
                if this_trait.data[item].name == sample:
                    value = str(this_trait.data[item].value)
                    samples.append(item)
                    vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                vals.append("x")
    else:
        samplelist = dataset.group.samplelist
        for sample in samplelist:
            in_trait_data = False
            for item in this_trait.data:
                if this_trait.data[item].name == sample:
                    value = str(this_trait.data[item].value)
                    samples.append(item)
                    vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                vals.append("x")

    if mapping_params.get('transform') == "qnorm":
        vals_minus_x = [float(val) for val in vals if val != "x"]
        qnorm_vals = normf(vals_minus_x)
        qnorm_vals_with_x = []
        counter = 0
        for val in vals:
            if val == "x":
                qnorm_vals_with_x.append("x")
            else:
                qnorm_vals_with_x.append(qnorm_vals[counter])
                counter += 1

        vals = qnorm_vals_with_x

    # It seems to take an empty string as default. This should probably be changed.
    covariates = ""

    if mapping_params.get('mapping_method') == "gemma":
        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
        # gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
        if mapping_params.get('use_loco') == "True":
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]
        else:
            result_markers = gemma_mapping.run_gemma(
                this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])
    elif mapping_params.get('mapping_method') == "rqtl":
        header_row = ["name", "chr", "cM", "lod_score"]
        if mapping_params['num_perm'] > 0:
            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl(this_trait.name, vals, samples, dataset, None, "Mb", mapping_params['rqtl_model'],
                                                                                             mapping_params['rqtl_method'], mapping_params['num_perm'], None,
                                                                                             mapping_params['do_control'], mapping_params['control_marker'],
                                                                                             mapping_params['manhattan_plot'], None)
        else:
            result_markers = rqtl_mapping.run_rqtl(this_trait.name, vals, samples, dataset, None, "Mb", mapping_params['rqtl_model'],
                                                   mapping_params['rqtl_method'], mapping_params['num_perm'], None,
                                                   mapping_params['do_control'], mapping_params['control_marker'],
                                                   mapping_params['manhattan_plot'], None)

    if mapping_params.get('limit_to'):
        result_markers = result_markers[:mapping_params['limit_to']]

    if mapping_params.get('format') == "csv":
        output_rows = []
        output_rows.append(header_row)
        for marker in result_markers:
            this_row = [marker[header] for header in header_row]
            output_rows.append(this_row)

        return output_rows, mapping_params['format']
    elif mapping_params['format'] == "json":
        return result_markers, mapping_params['format']
    else:
        return result_markers, None


def initialize_parameters(start_vars, dataset, this_trait):
    mapping_params = {}

    mapping_params['format'] = "json"
    if 'format' in start_vars:
        mapping_params['format'] = start_vars['format']

    mapping_params['limit_to'] = False
    if 'limit_to' in start_vars:
        if start_vars['limit_to'].isdigit():
            mapping_params['limit_to'] = int(start_vars['limit_to'])

    mapping_params['mapping_method'] = "gemma"
    if 'method' in start_vars:
        mapping_params['mapping_method'] = start_vars['method']

    if mapping_params['mapping_method'] == "rqtl":
        mapping_params['rqtl_method'] = "hk"
        mapping_params['rqtl_model'] = "normal"
        mapping_params['do_control'] = False
        mapping_params['control_marker'] = ""
        mapping_params['manhattan_plot'] = True
        mapping_params['pair_scan'] = False
        if 'rqtl_method' in start_vars:
            mapping_params['rqtl_method'] = start_vars['rqtl_method']
        if 'rqtl_model' in start_vars:
            mapping_params['rqtl_model'] = start_vars['rqtl_model']
        if 'control_marker' in start_vars:
            mapping_params['control_marker'] = start_vars['control_marker']
            mapping_params['do_control'] = True
        if 'pair_scan' in start_vars:
            if start_vars['pair_scan'].lower() == "true":
                mapping_params['pair_scan'] = True

        if 'interval_mapping' in start_vars:
            if start_vars['interval_mapping'].lower() == "true":
                mapping_params['manhattan_plot'] = False
        elif 'manhattan_plot' in start_vars:
            if start_vars['manhattan_plot'].lower() != "true":
                mapping_params['manhattan_plot'] = False

    mapping_params['maf'] = 0.01
    if 'maf' in start_vars:
        mapping_params['maf'] = start_vars['maf']  # Minor allele frequency

    mapping_params['use_loco'] = True
    if 'use_loco' in start_vars:
        if (start_vars['use_loco'].lower() == "false") or (start_vars['use_loco'].lower() == "no"):
            mapping_params['use_loco'] = False

    mapping_params['num_perm'] = 0
    mapping_params['perm_check'] = False
    if 'num_perm' in start_vars:
        try:
            mapping_params['num_perm'] = int(start_vars['num_perm'])
            mapping_params['perm_check'] = "ON"
        except:
            mapping_params['perm_check'] = False

    mapping_params['transform'] = False
    if 'transform' in start_vars:
        mapping_params['transform'] = start_vars['transform']

    mapping_params['genofile'] = False
    if 'genofile' in start_vars:
        mapping_params['genofile'] = start_vars['genofile']

    return mapping_params

def get_genofile_samplelist(dataset):
    genofile_samplelist = []

    genofile_json = dataset.group.get_genofiles()
    for genofile in genofile_json:
        if genofile['location'] == dataset.group.genofile and 'sample_list' in genofile:
            genofile_samplelist = genofile['sample_list']

    return genofile_samplelist