about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/api/mapping.py263
-rw-r--r--wqflask/wqflask/api/router.py63
2 files changed, 188 insertions, 138 deletions
diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py
index 83c61796..d830cefc 100644
--- a/wqflask/wqflask/api/mapping.py
+++ b/wqflask/wqflask/api/mapping.py
@@ -1,122 +1,141 @@
-from __future__ import absolute_import, division, print_function

-

-import string

-

-from base import data_set

-from base import webqtlConfig

-from base.trait import GeneralTrait, retrieve_sample_data

-

-from utility import helper_functions

-from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping

-

-import utility.logger

-logger = utility.logger.getLogger(__name__ )

-

-def do_mapping_for_api(start_vars):

-    assert('db' in start_vars)

-    assert('trait_id' in start_vars)

-

-    dataset = data_set.create_dataset(dataset_name = start_vars['db'])

-    dataset.group.get_markers()

-    this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id'])

-    this_trait = retrieve_sample_data(this_trait, dataset)

-

-    samples = []

-    vals = []

-

-    for sample in dataset.group.samplelist:

-        in_trait_data = False

-        for item in this_trait.data:

-            if this_trait.data[item].name == sample:

-                value = str(this_trait.data[item].value)

-                samples.append(item)

-                vals.append(value)

-                in_trait_data = True

-                break

-        if not in_trait_data:

-            vals.append("x")

-

-    mapping_params = initialize_parameters(start_vars, dataset, this_trait)

-

-    covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed.

-

-    if mapping_params['mapping_method'] == "gemma":

-        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]

-        if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api

-            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]

-        else:

-            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])

-    elif mapping_params['mapping_method'] == "rqtl":

-        header_row = ["name", "chr", "Mb", "lod_score"]

-        if mapping_params['num_perm'] > 0:

-            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], 

-                                                                                        mapping_params['perm_check'], mapping_params['num_perm'], 

-                                                                                        mapping_params['do_control'], mapping_params['control_marker'], 

-                                                                                        mapping_params['manhattan_plot'], mapping_params['pair_scan'])

-        else:

-            result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], 

-                                                 mapping_params['perm_check'], mapping_params['num_perm'], 

-                                                 mapping_params['do_control'], mapping_params['control_marker'], 

-                                                 mapping_params['manhattan_plot'], mapping_params['pair_scan'])

-

-    output_rows = []

-    output_rows.append(header_row)

-    for marker in result_markers:

-        this_row = [marker[header] for header in header_row]

-        output_rows.append(this_row)

-

-    return output_rows

-

-

-def initialize_parameters(start_vars, dataset, this_trait):

-    mapping_params = {}

-    mapping_params['mapping_method'] = "gemma"

-    if 'method' in start_vars:

-        mapping_params['mapping_method'] = start_vars['method']

-

-    if mapping_params['mapping_method'] == "rqtl":

-        mapping_params['rqtl_method'] = "hk"

-        mapping_params['rqtl_model'] = "normal"

-        mapping_params['do_control'] = False

-        mapping_params['control_marker'] = ""

-        mapping_params['manhattan_plot'] = True

-        mapping_params['pair_scan'] = False

-        if 'rqtl_method' in start_vars:

-            mapping_params['rqtl_method'] = start_vars['rqtl_method']

-        if 'rqtl_model' in start_vars:

-            mapping_params['rqtl_model'] = start_vars['rqtl_model']

-        if 'control_marker' in start_vars:

-            mapping_params['control_marker'] = start_vars['control_marker']

-            mapping_params['do_control'] = True

-        if 'pair_scan' in start_vars:

-            if start_vars['pair_scan'].lower() == "true":

-                mapping_params['pair_scan'] = True

-

-        if 'interval_mapping' in start_vars:

-            if start_vars['interval_mapping'].lower() == "true":

-                mapping_params['manhattan_plot'] = False

-        elif 'manhattan_plot' in start_vars:

-            if start_vars['manhattan_plot'].lower() != "true":

-                mapping_params['manhattan_plot'] = False

-

-    mapping_params['maf'] = 0.01

-    if 'maf' in start_vars:

-        mapping_params['maf'] = start_vars['maf'] # Minor allele frequency

-

-    mapping_params['use_loco'] = False

-    if 'use_loco' in start_vars:

-        if start_vars['use_loco'].lower() != "false":

-            mapping_params['use_loco'] = start_vars['use_loco']

-

-    mapping_params['num_perm'] = 0

-    mapping_params['perm_check'] = False

-    if 'num_perm' in start_vars:

-        try:

-            mapping_params['num_perm'] = int(start_vars['num_perm'])

-            mapping_params['perm_check'] = "ON"

-        except:

-            mapping_params['perm_check'] = False

-

-    return mapping_params

-    

-

+from __future__ import absolute_import, division, print_function
+
+import string
+
+from base import data_set
+from base import webqtlConfig
+from base.trait import GeneralTrait, retrieve_sample_data
+
+from utility import helper_functions
+from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def do_mapping_for_api(start_vars):
+    assert('db' in start_vars)
+    assert('trait_id' in start_vars)
+
+    dataset = data_set.create_dataset(dataset_name = start_vars['db'])
+    dataset.group.get_markers()
+    this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id'])
+    this_trait = retrieve_sample_data(this_trait, dataset)
+
+    samples = []
+    vals = []
+
+    for sample in dataset.group.samplelist:
+        in_trait_data = False
+        for item in this_trait.data:
+            if this_trait.data[item].name == sample:
+                value = str(this_trait.data[item].value)
+                samples.append(item)
+                vals.append(value)
+                in_trait_data = True
+                break
+        if not in_trait_data:
+            vals.append("x")
+
+    mapping_params = initialize_parameters(start_vars, dataset, this_trait)
+
+    covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed.
+
+    if mapping_params['mapping_method'] == "gemma":
+        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]
+        if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api
+            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]
+        else:
+            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])
+    elif mapping_params['mapping_method'] == "rqtl":
+        header_row = ["name", "chr", "cM", "lod_score"]
+        if mapping_params['num_perm'] > 0:
+            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'],
+                                                                                        mapping_params['perm_check'], mapping_params['num_perm'],
+                                                                                        mapping_params['do_control'], mapping_params['control_marker'],
+                                                                                        mapping_params['manhattan_plot'], mapping_params['pair_scan'])
+        else:
+            result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'],
+                                                 mapping_params['perm_check'], mapping_params['num_perm'],
+                                                 mapping_params['do_control'], mapping_params['control_marker'],
+                                                 mapping_params['manhattan_plot'], mapping_params['pair_scan'])
+
+    if mapping_params['limit_to']:
+        result_markers = result_markers[:mapping_params['limit_to']]
+
+    if mapping_params['format'] == "csv":
+        output_rows = []
+        output_rows.append(header_row)
+        for marker in result_markers:
+            this_row = [marker[header] for header in header_row]
+            output_rows.append(this_row)
+
+        return output_rows, mapping_params['format']
+    elif mapping_params['format'] == "json":
+        return result_markers, mapping_params['format']
+    else:
+        return result_markers, None
+
+
+
+def initialize_parameters(start_vars, dataset, this_trait):
+    mapping_params = {}
+
+    mapping_params['format'] = "json"
+    if 'format' in start_vars:
+        mapping_params['format'] = start_vars['format']
+
+    mapping_params['limit_to'] = False
+    if 'limit_to' in start_vars:
+        if start_vars['limit_to'].isdigit():
+            mapping_params['limit_to'] = int(start_vars['limit_to'])
+
+    mapping_params['mapping_method'] = "gemma"
+    if 'method' in start_vars:
+        mapping_params['mapping_method'] = start_vars['method']
+
+    if mapping_params['mapping_method'] == "rqtl":
+        mapping_params['rqtl_method'] = "hk"
+        mapping_params['rqtl_model'] = "normal"
+        mapping_params['do_control'] = False
+        mapping_params['control_marker'] = ""
+        mapping_params['manhattan_plot'] = True
+        mapping_params['pair_scan'] = False
+        if 'rqtl_method' in start_vars:
+            mapping_params['rqtl_method'] = start_vars['rqtl_method']
+        if 'rqtl_model' in start_vars:
+            mapping_params['rqtl_model'] = start_vars['rqtl_model']
+        if 'control_marker' in start_vars:
+            mapping_params['control_marker'] = start_vars['control_marker']
+            mapping_params['do_control'] = True
+        if 'pair_scan' in start_vars:
+            if start_vars['pair_scan'].lower() == "true":
+                mapping_params['pair_scan'] = True
+
+        if 'interval_mapping' in start_vars:
+            if start_vars['interval_mapping'].lower() == "true":
+                mapping_params['manhattan_plot'] = False
+        elif 'manhattan_plot' in start_vars:
+            if start_vars['manhattan_plot'].lower() != "true":
+                mapping_params['manhattan_plot'] = False
+
+    mapping_params['maf'] = 0.01
+    if 'maf' in start_vars:
+        mapping_params['maf'] = start_vars['maf'] # Minor allele frequency
+
+    mapping_params['use_loco'] = True
+    if 'use_loco' in start_vars:
+        if (start_vars['use_loco'].lower() == "false") or (start_vars['use_loco'].lower() == "no"):
+            mapping_params['use_loco'] = False
+
+    mapping_params['num_perm'] = 0
+    mapping_params['perm_check'] = False
+    if 'num_perm' in start_vars:
+        try:
+            mapping_params['num_perm'] = int(start_vars['num_perm'])
+            mapping_params['perm_check'] = "ON"
+        except:
+            mapping_params['perm_check'] = False
+
+    return mapping_params
+
+
diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py
index 8e2cbbdc..cbff6e83 100644
--- a/wqflask/wqflask/api/router.py
+++ b/wqflask/wqflask/api/router.py
@@ -293,7 +293,7 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"):
 @app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",))
 @app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",))
 def fetch_traits(dataset_name, file_format = "json"):
-    trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name)
+    trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args)
     if ("ids_only" in request.args) and (len(trait_ids) > 0):
         if file_format == "json":
             filename = dataset_name + "_trait_ids.json"
@@ -367,6 +367,10 @@ def fetch_traits(dataset_name, file_format = "json"):
 
                 field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"]
 
+            if 'limit_to' in request.args:
+                limit_number = request.args['limit_to']
+                query += "LIMIT " + str(limit_number)
+
             if file_format == "json":
                 filename = dataset_name + "_traits.json"
 
@@ -408,7 +412,7 @@ def fetch_traits(dataset_name, file_format = "json"):
 @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version))
 @app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version))
 def all_sample_data(dataset_name, file_format = "csv"):
-    trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name)
+    trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args)
 
     if len(trait_ids) > 0:
         sample_list = get_samplelist(dataset_name)
@@ -496,7 +500,7 @@ def all_sample_data(dataset_name, file_format = "csv"):
             output.headers["Content-type"] = "text/csv"
             return output
         else:
-            return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
+            return return_error(code=415, source=request.url_rule.rule, title="Unsupported file format", details="")
     else:
         return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
 
@@ -669,25 +673,35 @@ def get_corr_results():
 
 @app.route("/api/v_{}/mapping".format(version), methods=("GET",))
 def get_mapping_results():
-    results = mapping.do_mapping_for_api(request.args)
+    results, format = mapping.do_mapping_for_api(request.args)
 
     if len(results) > 0:
-        filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv"
+        if format == "csv":
+            filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv"
 
-        si = StringIO.StringIO()
-        csv_writer = csv.writer(si)
-        csv_writer.writerows(results)
-        output = make_response(si.getvalue())
-        output.headers["Content-Disposition"] = "attachment; filename=" + filename
-        output.headers["Content-type"] = "text/csv"
+            si = StringIO.StringIO()
+            csv_writer = csv.writer(si)
+            csv_writer.writerows(results)
+            output = make_response(si.getvalue())
+            output.headers["Content-Disposition"] = "attachment; filename=" + filename
+            output.headers["Content-type"] = "text/csv"
 
-        return output
+            return output
+        elif format == "json":
+            return flask.jsonify(results)
+        else:
+            return return_error(code=415, source=request.url_rule.rule, title="Unsupported Format", details="")
     else:
         return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
 
 @app.route("/api/v_{}/genotypes/<path:group_name>".format(version))
 @app.route("/api/v_{}/genotypes/<path:group_name>.<path:file_format>".format(version))
 def get_genotypes(group_name, file_format="csv"):
+    limit_num = None
+    if 'limit_to' in request.args:
+        if request.args['limit_to'].isdigit():
+            limit_num = int(request.args['limit_to'])
+
     si = StringIO.StringIO()
     if file_format == "csv" or file_format == "geno":
         filename = group_name + ".geno"
@@ -695,11 +709,15 @@ def get_genotypes(group_name, file_format="csv"):
         if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)):
             output_lines = []
             with open("{0}/{1}.geno".format(flat_files("genotype"), group_name)) as genofile:
+                i = 0
                 for line in genofile:
                     if line[0] == "#" or line[0] == "@":
                         output_lines.append([line.strip()])
                     else:
+                        if i >= limit_num:
+                            break
                         output_lines.append(line.split())
+                        i += 1
 
             csv_writer = csv.writer(si, delimiter = "\t", escapechar = "\\", quoting = csv.QUOTE_NONE)
         else:
@@ -710,8 +728,12 @@ def get_genotypes(group_name, file_format="csv"):
         if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)):
             output_lines = []
             with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"), group_name)) as genofile:
+                i = 0
                 for line in genofile:
+                    if i >= limit_num:
+                        break
                     output_lines.append([line.strip() for line in line.split(",")])
+                    i += 1
 
             csv_writer = csv.writer(si, delimiter = ",")
         else:
@@ -736,7 +758,13 @@ def return_error(code, source, title, details):
 
     return flask.jsonify(json_ob)
 
-def get_dataset_trait_ids(dataset_name):
+def get_dataset_trait_ids(dataset_name, start_vars):
+
+    if 'limit_to' in start_vars:
+        limit_string = "LIMIT " + str(start_vars['limit_to'])
+    else:
+        limit_string = ""
+
     if "Geno" in dataset_name:
         data_type = "Geno" #ZS: Need to pass back the dataset type
         query =    """
@@ -748,7 +776,8 @@ def get_dataset_trait_ids(dataset_name):
                                 Geno.Id = GenoXRef.GenoId AND
                                 GenoXRef.GenoFreezeId = GenoFreeze.Id AND
                                 GenoFreeze.Name = "{0}"
-                        """.format(dataset_name)
+                            {1}
+                        """.format(dataset_name, limit_string)
 
         results = g.db.execute(query).fetchall()
 
@@ -769,7 +798,8 @@ def get_dataset_trait_ids(dataset_name):
                              PublishXRef
                          WHERE
                              PublishXRef.InbredSetId = "{0}"
-                      """.format(dataset_id)
+                         {1}
+                      """.format(dataset_id, limit_string)
 
         results = g.db.execute(query).fetchall()
 
@@ -788,7 +818,8 @@ def get_dataset_trait_ids(dataset_name):
                             ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
                             ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                             ProbeSetFreeze.Name = "{0}"
-                     """.format(dataset_name)
+                        {1}
+                     """.format(dataset_name, limit_string)
 
         results = g.db.execute(query).fetchall()