about summary refs log tree commit diff
diff options
context:
space:
mode:
authorzsloan2019-05-24 11:36:08 -0500
committerzsloan2019-05-24 11:36:08 -0500
commit1217563ede4aef48b124613ea852c4db8803e6b4 (patch)
treec5eb40da3404af493115db1af370ff3147dc611e
parente304c2cfdf1b38da483efa607a2acd72b66ad0de (diff)
downloadgenenetwork2-1217563ede4aef48b124613ea852c4db8803e6b4.tar.gz
Added currently api progress, which will henceforth be appropriately committed in this branch
-rw-r--r--wqflask/wqflask/__init__.py1
-rw-r--r--wqflask/wqflask/api/__init__.py0
-rw-r--r--wqflask/wqflask/api/correlation.py237
-rw-r--r--wqflask/wqflask/api/mapping.py122
-rw-r--r--wqflask/wqflask/api/router.py747
5 files changed, 1107 insertions, 0 deletions
diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py
index bc8e9900..399e794d 100644
--- a/wqflask/wqflask/__init__.py
+++ b/wqflask/wqflask/__init__.py
@@ -22,3 +22,4 @@ app.jinja_env.globals.update(
 )
 
 import wqflask.views
+from wqflask.api import router
\ No newline at end of file
diff --git a/wqflask/wqflask/api/__init__.py b/wqflask/wqflask/api/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/wqflask/api/__init__.py
diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py
new file mode 100644
index 00000000..66eb94ac
--- /dev/null
+++ b/wqflask/wqflask/api/correlation.py
@@ -0,0 +1,237 @@
+from __future__ import absolute_import, division, print_function

+

+import collections

+

+import scipy

+

+from MySQLdb import escape_string as escape

+

+from flask import g

+

+from base import data_set

+from base.trait import GeneralTrait, retrieve_sample_data

+

+from wqflask.correlation.show_corr_results import generate_corr_json

+from wqflask.correlation import correlation_functions

+

+from utility import webqtlUtil, helper_functions, corr_result_helpers

+from utility.benchmark import Bench

+

+import utility.logger

+logger = utility.logger.getLogger(__name__ )

+

+def do_correlation(start_vars):

+    assert('db' in start_vars)

+    assert('target_db' in start_vars)

+    assert('trait_id' in start_vars)

+

+    this_dataset = data_set.create_dataset(dataset_name = start_vars['db'])

+    target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db'])

+    this_trait = GeneralTrait(dataset = this_dataset, name = start_vars['trait_id'])

+    this_trait = retrieve_sample_data(this_trait, this_dataset)

+

+    corr_params = init_corr_params(start_vars)

+

+    corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params)

+    #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))

+

+    final_results = []

+    for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]):

+        if corr_params['type'] == "tissue":

+            [sample_r, num_overlap, sample_p, symbol] = corr_results[trait]

+            result_dict = {

+                "trait"     : trait,

+                "sample_r"  : sample_r,

+                "#_strains" : num_overlap,

+                "p_value"   : sample_p,

+                "symbol"    : symbol

+            }

+        elif corr_params['type'] == "literature" or corr_params['type'] == "lit":

+            [gene_id, sample_r] = corr_results[trait]

+            result_dict = {

+                "trait"     : trait,

+                "sample_r"  : sample_r,

+                "gene_id"   : gene_id

+            }

+        else:

+            [sample_r, sample_p, num_overlap] = corr_results[trait]

+            result_dict = {

+                "trait"     : trait,

+                "sample_r"  : sample_r,

+                "#_strains" : num_overlap,

+                "p_value"   : sample_p

+            }

+

+        final_results.append(result_dict)

+

+    # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True)

+

+    return final_results

+

+def calculate_results(this_trait, this_dataset, target_dataset, corr_params):

+    corr_results = {}

+

+    target_dataset.get_trait_data()

+

+    if corr_params['type'] == "tissue":

+        trait_symbol_dict = this_dataset.retrieve_genes("Symbol")

+        corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params)

+        sorted_results = collections.OrderedDict(sorted(corr_results.items(),

+                                                        key=lambda t: -abs(t[1][1])))

+    elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature"

+        trait_geneid_dict = this_dataset.retrieve_genes("GeneId")

+        corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params)

+        sorted_results = collections.OrderedDict(sorted(corr_results.items(),

+                                                 key=lambda t: -abs(t[1][1])))

+    else:

+        for target_trait, target_vals in target_dataset.trait_data.iteritems():

+            result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type'])

+            if result is not None:

+                corr_results[target_trait] = result

+

+        sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))

+

+    return sorted_results

+

+def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1):

+    #Gets tissue expression values for the primary trait

+    primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol])

+

+    if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:

+        primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()]

+

+        corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values())

+

+        tissue_corr_data = {}

+        for trait, symbol in trait_symbol_dict.iteritems():

+            if symbol and symbol.lower() in corr_result_tissue_vals_dict:

+                this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]

+

+                result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,

+                                                                            this_trait_tissue_values,

+                                                                            corr_params['method'])

+

+                tissue_corr_data[trait] = [result[0], result[1], result[2], symbol]

+

+        return tissue_corr_data

+

+def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params):

+    input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid)

+

+    lit_corr_data = {}

+    for trait, gene_id in trait_geneid_dict.iteritems():

+        mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id)

+

+        if mouse_gene_id and str(mouse_gene_id).find(";") == -1:

+            result = g.db.execute(

+                """SELECT value

+                    FROM LCorrRamin3

+                    WHERE GeneId1='%s' and

+                            GeneId2='%s'

+                """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))

+            ).fetchone()

+            if not result:

+                result = g.db.execute("""SELECT value

+                    FROM LCorrRamin3

+                    WHERE GeneId2='%s' and

+                            GeneId1='%s'

+                """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id))

+                ).fetchone()

+            if result:

+                lit_corr = result.value

+                lit_corr_data[trait] = [gene_id, lit_corr]

+            else:

+                lit_corr_data[trait] = [gene_id, 0]

+        else:

+            lit_corr_data[trait] = [gene_id, 0]

+

+    return lit_corr_data

+

+def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type):

+    """

+    Calculates the sample r (or rho) and p-value

+

+    Given a primary trait and a target trait's sample values,

+    calculates either the pearson r or spearman rho and the p-value

+    using the corresponding scipy functions.

+    """

+

+    this_trait_vals = []

+    shared_target_vals = []

+    for i, sample in enumerate(target_dataset.group.samplelist):

+        if sample in this_trait.data:

+            this_sample_value = this_trait.data[sample].value

+            target_sample_value = target_vals[i]

+            this_trait_vals.append(this_sample_value)

+            shared_target_vals.append(target_sample_value)

+

+    this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals)

+

+    if type == 'pearson':

+        sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals)

+    else:

+        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals)

+

+    if num_overlap > 5:

+        if scipy.isnan(sample_r):

+            return None

+        else:

+            return [sample_r, sample_p, num_overlap]

+

+def convert_to_mouse_gene_id(species=None, gene_id=None):

+    """If the species is rat or human, translate the gene_id to the mouse geneid

+

+    If there is no input gene_id or there's no corresponding mouse gene_id, return None

+

+    """

+    if not gene_id:

+        return None

+

+    mouse_gene_id = None

+

+    if species == 'mouse':

+        mouse_gene_id = gene_id

+

+    elif species == 'rat':

+

+        query = """SELECT mouse

+                FROM GeneIDXRef

+                WHERE rat='%s'""" % escape(gene_id)

+

+        result = g.db.execute(query).fetchone()

+        if result != None:

+            mouse_gene_id = result.mouse

+

+    elif species == 'human':

+

+        query = """SELECT mouse

+                FROM GeneIDXRef

+                WHERE human='%s'""" % escape(gene_id)

+

+        result = g.db.execute(query).fetchone()

+        if result != None:

+            mouse_gene_id = result.mouse

+

+    return mouse_gene_id

+

+def init_corr_params(start_vars):

+    method = "pearson"

+    if 'method' in start_vars:

+        method = start_vars['method']

+

+    type = "sample"

+    if 'type' in start_vars:

+        type = start_vars['type']

+

+    return_count = 500

+    if 'return_count' in start_vars:

+        assert(start_vars['return_count'].isdigit())

+        return_count = int(start_vars['return_count'])

+

+    corr_params = {

+        'method'       : method,

+        'type'         : type,

+        'return_count' : return_count

+    }

+

+    return corr_params
\ No newline at end of file
diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py
new file mode 100644
index 00000000..83c61796
--- /dev/null
+++ b/wqflask/wqflask/api/mapping.py
@@ -0,0 +1,122 @@
+from __future__ import absolute_import, division, print_function

+

+import string

+

+from base import data_set

+from base import webqtlConfig

+from base.trait import GeneralTrait, retrieve_sample_data

+

+from utility import helper_functions

+from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping

+

+import utility.logger

+logger = utility.logger.getLogger(__name__ )

+

+def do_mapping_for_api(start_vars):

+    assert('db' in start_vars)

+    assert('trait_id' in start_vars)

+

+    dataset = data_set.create_dataset(dataset_name = start_vars['db'])

+    dataset.group.get_markers()

+    this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id'])

+    this_trait = retrieve_sample_data(this_trait, dataset)

+

+    samples = []

+    vals = []

+

+    for sample in dataset.group.samplelist:

+        in_trait_data = False

+        for item in this_trait.data:

+            if this_trait.data[item].name == sample:

+                value = str(this_trait.data[item].value)

+                samples.append(item)

+                vals.append(value)

+                in_trait_data = True

+                break

+        if not in_trait_data:

+            vals.append("x")

+

+    mapping_params = initialize_parameters(start_vars, dataset, this_trait)

+

+    covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed.

+

+    if mapping_params['mapping_method'] == "gemma":

+        header_row = ["name", "chr", "Mb", "lod_score", "p_value"]

+        if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api

+            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0]

+        else:

+            result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])

+    elif mapping_params['mapping_method'] == "rqtl":

+        header_row = ["name", "chr", "Mb", "lod_score"]

+        if mapping_params['num_perm'] > 0:

+            _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], 

+                                                                                        mapping_params['perm_check'], mapping_params['num_perm'], 

+                                                                                        mapping_params['do_control'], mapping_params['control_marker'], 

+                                                                                        mapping_params['manhattan_plot'], mapping_params['pair_scan'])

+        else:

+            result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], 

+                                                 mapping_params['perm_check'], mapping_params['num_perm'], 

+                                                 mapping_params['do_control'], mapping_params['control_marker'], 

+                                                 mapping_params['manhattan_plot'], mapping_params['pair_scan'])

+

+    output_rows = []

+    output_rows.append(header_row)

+    for marker in result_markers:

+        this_row = [marker[header] for header in header_row]

+        output_rows.append(this_row)

+

+    return output_rows

+

+

+def initialize_parameters(start_vars, dataset, this_trait):

+    mapping_params = {}

+    mapping_params['mapping_method'] = "gemma"

+    if 'method' in start_vars:

+        mapping_params['mapping_method'] = start_vars['method']

+

+    if mapping_params['mapping_method'] == "rqtl":

+        mapping_params['rqtl_method'] = "hk"

+        mapping_params['rqtl_model'] = "normal"

+        mapping_params['do_control'] = False

+        mapping_params['control_marker'] = ""

+        mapping_params['manhattan_plot'] = True

+        mapping_params['pair_scan'] = False

+        if 'rqtl_method' in start_vars:

+            mapping_params['rqtl_method'] = start_vars['rqtl_method']

+        if 'rqtl_model' in start_vars:

+            mapping_params['rqtl_model'] = start_vars['rqtl_model']

+        if 'control_marker' in start_vars:

+            mapping_params['control_marker'] = start_vars['control_marker']

+            mapping_params['do_control'] = True

+        if 'pair_scan' in start_vars:

+            if start_vars['pair_scan'].lower() == "true":

+                mapping_params['pair_scan'] = True

+

+        if 'interval_mapping' in start_vars:

+            if start_vars['interval_mapping'].lower() == "true":

+                mapping_params['manhattan_plot'] = False

+        elif 'manhattan_plot' in start_vars:

+            if start_vars['manhattan_plot'].lower() != "true":

+                mapping_params['manhattan_plot'] = False

+

+    mapping_params['maf'] = 0.01

+    if 'maf' in start_vars:

+        mapping_params['maf'] = start_vars['maf'] # Minor allele frequency

+

+    mapping_params['use_loco'] = False

+    if 'use_loco' in start_vars:

+        if start_vars['use_loco'].lower() != "false":

+            mapping_params['use_loco'] = start_vars['use_loco']

+

+    mapping_params['num_perm'] = 0

+    mapping_params['perm_check'] = False

+    if 'num_perm' in start_vars:

+        try:

+            mapping_params['num_perm'] = int(start_vars['num_perm'])

+            mapping_params['perm_check'] = "ON"

+        except:

+            mapping_params['perm_check'] = False

+

+    return mapping_params

+    

+

diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py
new file mode 100644
index 00000000..efc817ea
--- /dev/null
+++ b/wqflask/wqflask/api/router.py
@@ -0,0 +1,747 @@
+# GN2 API
+
+from __future__ import absolute_import, division, print_function
+
+import os, io, csv, json, datetime
+
+import StringIO
+
+import flask
+from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect
+import sqlalchemy
+from wqflask import app
+
+from wqflask.api import correlation, mapping
+
+from utility.tools import flat_files
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+version = "pre1"
+
+@app.route("/api/v_{}/".format(version))
+def hello_world():
+    return flask.jsonify({'hello':'world'})
+
+@app.route("/api/v_{}/species".format(version))
+def get_species_list():
+    results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;")
+    the_species = results.fetchall()
+    species_list = []
+    for species in the_species:
+        species_dict = {
+          'Id'         : species[0],
+          'Name'       : species[1],
+          'FullName'   : species[2],
+          'TaxonomyId' : species[3]
+        }
+        species_list.append(species_dict)
+
+    return flask.jsonify(species_list)
+
+@app.route("/api/v_{}/species/<path:species_name>".format(version))
+@app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version))
+def get_species_info(species_name, file_format = "json"):
+    results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId 
+                              FROM Species 
+                              WHERE (Name='{0}' OR FullName='{0}' OR SpeciesName='{0}');""".format(species_name))
+
+    the_species = results.fetchone()
+    species_dict = { 
+      'Id'         : the_species[0],
+      'Name'       : the_species[1],
+      'FullName'   : the_species[2],
+      'TaxonomyId' : the_species[3]
+    }
+    
+    return flask.jsonify(species_dict)
+
+@app.route("/api/v_{}/groups".format(version))
+@app.route("/api/v_{}/<path:species_name>/groups".format(version))
+def get_groups_list(species_name=None):
+    if species_name:
+        results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, 
+                                         InbredSet.Name, InbredSet.FullName, InbredSet.public, 
+                                         InbredSet.MappingMethodId, InbredSet.GeneticType
+                                  FROM InbredSet, Species
+                                  WHERE InbredSet.SpeciesId = Species.Id AND
+                                        (Species.Name = '{0}' OR 
+                                         Species.FullName='{0}' OR 
+                                         Species.SpeciesName='{0}');""".format(species_name))
+    else:
+        results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, 
+                                         InbredSet.Name, InbredSet.FullName, InbredSet.public, 
+                                         InbredSet.MappingMethodId, InbredSet.GeneticType
+                                  FROM InbredSet;""")
+
+    the_groups = results.fetchall()
+    if the_groups:
+        groups_list = []
+        for group in the_groups:
+            group_dict = {
+              'Id'              : group[0],
+              'SpeciesId'       : group[1],
+              'DisplayName'     : group[2],
+              'Name'            : group[3],
+              'FullName'        : group[4],
+              'public'          : group[5],
+              'MappingMethodId' : group[6],
+              'GeneticType'     : group[7]
+            }
+            groups_list.append(group_dict)
+
+        return flask.jsonify(groups_list)
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/group/<path:group_name>".format(version))
+@app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version))
+@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version))
+def get_group_info(group_name, species_name = None, file_format = "json"):
+    if species_name:
+        results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, 
+                                         InbredSet.Name, InbredSet.FullName, InbredSet.public, 
+                                         InbredSet.MappingMethodId, InbredSet.GeneticType
+                                  FROM InbredSet, Species
+                                  WHERE InbredSet.SpeciesId = Species.Id AND
+                                        (InbredSet.InbredSetName = '{0}' OR
+                                         InbredSet.Name = '{0}' OR
+                                         InbredSet.FullName = '{0}') AND
+                                        (Species.Name = '{1}' OR 
+                                         Species.FullName='{1}' OR 
+                                         Species.SpeciesName='{1}');""".format(group_name, species_name))
+    else:
+        results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, 
+                                         InbredSet.Name, InbredSet.FullName, InbredSet.public, 
+                                         InbredSet.MappingMethodId, InbredSet.GeneticType
+                                  FROM InbredSet
+                                  WHERE (InbredSet.InbredSetName = '{0}' OR
+                                         InbredSet.Name = '{0}' OR
+                                         InbredSet.FullName = '{0}');""".format(group_name))
+
+    group = results.fetchone()
+    if group:
+        group_dict = {
+          'Id'              : group[0],
+          'SpeciesId'       : group[1],
+          'DisplayName'     : group[2],
+          'Name'            : group[3],
+          'FullName'        : group[4],
+          'public'          : group[5],
+          'MappingMethodId' : group[6],
+          'GeneticType'     : group[7]
+        }
+
+        return flask.jsonify(group_dict)
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/datasets/<path:group_name>".format(version))
+@app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version))
+def get_datasets_for_group(group_name, species_name=None):
+    if species_name:
+        results = g.db.execute("""
+                                  SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
+                                         ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
+                                         ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
+                                         ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
+                                  FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species
+                                  WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+                                        ProbeFreeze.InbredSetId = InbredSet.Id AND
+                                        (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}') AND
+                                        InbredSet.SpeciesId = Species.Id AND
+                                        (Species.SpeciesName = '{1}' OR Species.MenuName = '{1}' OR Species.FullName = '{1}');
+                               """.format(group_name, species_name))
+    else:
+        results = g.db.execute("""
+                                  SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, ProbeSetFreeze.AvgID,
+                                         ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.FullName,
+                                         ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.public,
+                                         ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale
+                                  FROM ProbeSetFreeze, ProbeFreeze, InbredSet
+                                  WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+                                        ProbeFreeze.InbredSetId = InbredSet.Id AND
+                                        (InbredSet.Name = '{0}' OR InbredSet.InbredSetName = '{0}' OR InbredSet.FullName = '{0}');
+                               """.format(group_name))
+
+    the_datasets = results.fetchall()
+
+    if the_datasets:
+        datasets_list = []
+        for dataset in the_datasets:
+            dataset_dict = {
+              'Id'                 : dataset[0],
+              'ProbeFreezeId'      : dataset[1],
+              'AvgID'              : dataset[2],
+              'Short_Abbreviation' : dataset[3],
+              'Long_Abbreviation'  : dataset[4],
+              'FullName'           : dataset[5],
+              'ShortName'          : dataset[6],
+              'CreateTime'         : dataset[7],
+              'public'             : dataset[8],
+              'confidentiality'    : dataset[9],
+              'DataScale'          : dataset[10]
+            }
+            datasets_list.append(dataset_dict)
+
+        return flask.jsonify(datasets_list)
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/dataset/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version))
+def get_dataset_info(dataset_name, group_name = None, file_format="json"):
+    #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets
+
+    datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don't know if this will actually happen in practice
+
+    probeset_query = """
+                SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName,
+                       ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId,
+                       Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality
+                FROM ProbeSetFreeze, ProbeFreeze, Tissue
+            """
+
+    where_statement = """
+                         WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+                               ProbeFreeze.TissueId = Tissue.Id AND
+                      """
+    if dataset_name.isdigit():
+        where_statement += """
+                              ProbeSetFreeze.Id = '{}'
+                           """.format(dataset_name)
+    else:
+        where_statement += """
+                              (ProbeSetFreeze.Name = '{0}' OR ProbeSetFreeze.Name2 = '{0}' OR
+                              ProbeSetFreeze.FullName = '{0}' OR ProbeSetFreeze.ShortName = '{0}')
+                           """.format(dataset_name)
+
+    probeset_query += where_statement
+    probeset_results = g.db.execute(probeset_query)
+    dataset = probeset_results.fetchone()
+
+    if dataset:
+        dataset_dict = {
+          'dataset_type' : "mRNA expression",
+          'id'           : dataset[0],
+          'name'         : dataset[1],
+          'full_name'    : dataset[2],
+          'short_name'   : dataset[3],
+          'data_scale'   : dataset[4],
+          'tissue_id'    : dataset[5],
+          'tissue'       : dataset[6],
+          'public'       : dataset[7],
+          'confidential' : dataset[8]
+        }
+
+        datasets_list.append(dataset_dict)
+
+    if group_name:
+        pheno_query = """
+                         SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description,
+                                Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description,
+                                Publication.PubMed_ID, Publication.Title, Publication.Year
+                         FROM PublishXRef, Phenotype, Publication, InbredSet
+                         WHERE PublishXRef.InbredSetId = InbredSet.Id AND
+                               PublishXRef.PhenotypeId = Phenotype.Id AND
+                               PublishXRef.PublicationId = Publication.Id AND
+                               InbredSet.Name = '{0}' AND PublishXRef.Id = '{1}'
+                      """.format(group_name, dataset_name)
+
+        logger.debug("QUERY:", pheno_query)
+
+        pheno_results = g.db.execute(pheno_query)
+        dataset = pheno_results.fetchone()
+
+        if dataset:
+            if dataset[5]:
+                dataset_dict = {
+                  'dataset_type' : "phenotype",
+                  'id'           : dataset[0],
+                  'name'         : dataset[1],
+                  'description'  : dataset[2],
+                  'pubmed_id'    : dataset[5],
+                  'title'        : dataset[6],
+                  'year'         : dataset[7]
+                }
+            elif dataset[4]:
+                dataset_dict = {
+                  'dataset_type' : "phenotype",
+                  'id'           : dataset[0],
+                  'name'         : dataset[3],
+                  'description'  : dataset[4]
+                }
+            else:
+                dataset_dict = {
+                  'dataset_type' : "phenotype",
+                  'id'           : dataset[0]
+                }
+
+            datasets_list.append(dataset_dict)
+
+    if len(datasets_list) > 1:
+        return flask.jsonify(datasets_list)
+    elif len(datasets_list) == 1:
+        return flask.jsonify(dataset_dict)
+    else:
+        return render_template("/api/no_results.html")
+
+
+@app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version))
+@app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version))
+def all_sample_data(dataset_name, file_format = "csv"):
+    trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name)
+
+    if len(trait_ids) > 0:
+        sample_list = get_samplelist(dataset_name)
+
+        if data_type == "ProbeSet":
+            query = """
+                        SELECT
+                            Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
+                        FROM
+                            (ProbeSetData, Strain, ProbeSetXRef)
+                        LEFT JOIN ProbeSetSE ON
+                            (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                        WHERE
+                            ProbeSetXRef.ProbeSetFreezeId = '{0}' AND
+                            ProbeSetXRef.ProbeSetId = '{1}' AND
+                            ProbeSetXRef.DataId = ProbeSetData.Id AND
+                            ProbeSetData.StrainId = Strain.Id
+                        ORDER BY
+                            Strain.Name
+                    """
+        elif data_type == "Geno":
+            query = """
+                        SELECT
+                            Strain.Name, Strain.Name2, GenoData.value, GenoData.Id, GenoSE.error
+                        FROM
+                            (GenoData, Strain, GenoXRef)
+                        LEFT JOIN GenoSE ON
+                            (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+                        WHERE
+                            GenoXRef.GenoFreezeId = '{0}' AND
+                            GenoXRef.GenoId = '{1}' AND
+                            GenoXRef.DataId = GenoData.Id AND
+                            GenoData.StrainId = Strain.Id
+                        ORDER BY
+                            Strain.Name
+                    """
+        else:
+            query = """
+                        SELECT
+                            Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
+                        FROM
+                            (PublishData, Strain, PublishXRef)
+                        LEFT JOIN PublishSE ON
+                            (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+                        LEFT JOIN NStrain ON
+                            (NStrain.DataId = PublishData.Id AND
+                            NStrain.StrainId = PublishData.StrainId)
+                        WHERE
+                            PublishXRef.InbredSetId = '{0}' AND
+                            PublishXRef.PhenotypeId = '{1}' AND
+                            PublishData.Id = PublishXRef.DataId AND
+                            PublishData.StrainId = Strain.Id
+                        ORDER BY
+                            Strain.Name
+                    """
+
+        if file_format == "csv":
+            filename = dataset_name + "_sample_data.csv"
+
+            results_list = []
+            header_list = []
+            header_list.append("Trait ID")
+            header_list += sample_list
+            results_list.append(header_list)
+            for i, trait_id in enumerate(trait_ids):
+                line_list = []
+                line_list.append(str(trait_names[i]))
+                final_query = query.format(dataset_id, trait_id)
+                results = g.db.execute(final_query).fetchall()
+                results_dict = {}
+                for item in results:
+                    results_dict[item[0]] = item[2]
+                for sample in sample_list:
+                    if sample in results_dict:
+                        line_list.append(results_dict[sample])
+                    else:
+                        line_list.append("x")
+                results_list.append(line_list)
+
+            si = StringIO.StringIO()
+            csv_writer = csv.writer(si)
+            csv_writer.writerows(results_list)
+            output = make_response(si.getvalue())
+            output.headers["Content-Disposition"] = "attachment; filename=" + filename
+            output.headers["Content-type"] = "text/csv"
+            return output
+        else:
+            return render_template("/api/no_results.html")
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+def trait_sample_data(dataset_name, trait_name, file_format = "json"):
+    probeset_query = """
+                        SELECT
+                            Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
+                        FROM
+                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                        LEFT JOIN ProbeSetSE ON
+                            (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                        WHERE
+                            ProbeSet.Name = '{0}' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetFreeze.Name = '{1}' AND
+                            ProbeSetXRef.DataId = ProbeSetData.Id AND
+                            ProbeSetData.StrainId = Strain.Id
+                        ORDER BY
+                            Strain.Name
+                     """.format(trait_name, dataset_name)
+
+    probeset_results = g.db.execute(probeset_query)
+
+    sample_data = probeset_results.fetchall()
+    if len(sample_data) > 0:
+        sample_list = []
+        for sample in sample_data:
+            sample_dict = {
+              'sample_name'   : sample[0],
+              'sample_name_2' : sample[1],
+              'value'         : sample[2],
+              'data_id'       : sample[3],
+            }
+            if sample[4]:
+                sample_dict['se'] = sample[4]
+            sample_list.append(sample_dict)
+
+        return flask.jsonify(sample_list)
+    else:
+        if not dataset_name.isdigit():
+            group_id = get_group_id(dataset_name)
+            if group_id:
+                dataset_or_group = group_id
+            else:
+                dataset_or_group = dataset_name
+        else:
+            dataset_or_group = dataset_name
+
+        pheno_query = """
+                         SELECT
+                             Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
+                         FROM
+                             (PublishData, Strain, PublishXRef, PublishFreeze)
+                         LEFT JOIN PublishSE ON
+                             (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+                         LEFT JOIN NStrain ON
+                             (NStrain.DataId = PublishData.Id AND
+                             NStrain.StrainId = PublishData.StrainId)
+                         WHERE
+                             PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                             PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = '{1}' AND
+                             (PublishFreeze.Id = '{0}' OR PublishFreeze.Name = '{0}' OR 
+                              PublishFreeze.ShortName = '{0}' OR PublishXRef.InbredSetId = '{0}') AND 
+                             PublishData.StrainId = Strain.Id
+                         ORDER BY
+                             Strain.Name
+                      """.format(dataset_or_group, trait_name)
+
+        pheno_results = g.db.execute(pheno_query)
+
+        sample_data = pheno_results.fetchall()
+        if len(sample_data) > 0:
+            sample_list = []
+            for sample in sample_data:
+                sample_dict = {
+                  'sample_name'   : sample[0],
+                  'sample_name_2' : sample[1],
+                  'value'         : sample[2],
+                  'data_id'       : sample[3]
+                }
+                if sample[4]:
+                    sample_dict['se'] = sample[4]
+                if sample[5]:
+                    sample_dict['n_cases'] = sample[5]
+                sample_list.append(sample_dict)
+
+            return flask.jsonify(sample_list)
+        else:
+            return render_template("/api/no_results.html") 
+
+@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version))
+@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
+def get_trait_info(dataset_name, trait_name, file_format = "json"):
+    probeset_query = """
+                        SELECT
+                            ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, 
+                            ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive
+                        FROM
+                            ProbeSet, ProbeSetXRef, ProbeSetFreeze
+                        WHERE
+                            ProbeSet.Name = '{0}' AND 
+                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetFreeze.Name = '{1}'
+                     """.format(trait_name, dataset_name)
+
+    probeset_results = g.db.execute(probeset_query)
+
+    trait_info = probeset_results.fetchone()
+    if trait_info:
+        trait_dict = {
+            'id'          : trait_info[0],
+            'name'        : trait_info[1],
+            'symbol'      : trait_info[2],
+            'description' : trait_info[3],
+            'chr'         : trait_info[4],
+            'mb'          : trait_info[5],
+            'alias'       :trait_info[6],
+            'mean'        : trait_info[7],
+            'se'          : trait_info[8],
+            'locus'       : trait_info[9],
+            'lrs'         : trait_info[10],
+            'p_value'     : trait_info[11],
+            'additive'    : trait_info[12]
+        }
+
+        return flask.jsonify(trait_dict)
+    else:
+        if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish"
+            dataset_name = dataset_name.replace("Publish", "")
+        
+        group_id = get_group_id(dataset_name)
+        pheno_query = """
+                         SELECT
+                             PublishXRef.PhenotypeId, PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
+                         FROM
+                             PublishXRef
+                         WHERE
+                             PublishXRef.Id = '{0}' AND
+                             PublishXRef.InbredSetId = '{1}'
+                      """.format(trait_name, group_id)
+        
+        logger.debug("QUERY:", pheno_query)
+
+        pheno_results = g.db.execute(pheno_query)
+
+        trait_info = pheno_results.fetchone()
+        if trait_info:
+            trait_dict = {
+                'id'       : trait_info[0],
+                'locus'    : trait_info[1],
+                'lrs'      : trait_info[2],
+                'additive' : trait_info[3]
+            }
+
+            return flask.jsonify(trait_dict)
+        else:
+            return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/correlation".format(version), methods=('GET',))
+def get_corr_results():
+    results = correlation.do_correlation(request.args)
+
+    if len(results) > 0:
+        return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/mapping".format(version), methods=('GET',))
+def get_mapping_results():
+    results = mapping.do_mapping_for_api(request.args)
+
+    if len(results) > 0:
+        filename = "mapping_" + datetime.datetime.utcnow().strftime('%b_%d_%Y_%I:%M%p') + ".csv"
+
+        si = StringIO.StringIO()
+        csv_writer = csv.writer(si)
+        csv_writer.writerows(results)
+        output = make_response(si.getvalue())
+        output.headers["Content-Disposition"] = "attachment; filename=" + filename
+        output.headers["Content-type"] = "text/csv"
+
+        return output
+    else:
+        return render_template("/api/no_results.html")
+
+@app.route("/api/v_{}/genotypes/<path:group_name>".format(version))
+@app.route("/api/v_{}/genotypes/<path:group_name>.<path:file_format>".format(version))
+def get_genotypes(group_name, file_format="csv"):
+    si = StringIO.StringIO()
+    if file_format == "csv" or file_format == "geno":
+        filename = group_name + ".geno"
+
+        if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)):
+            output_lines = []
+            with open('{0}/{1}.geno'.format(flat_files('genotype'), group_name)) as genofile:
+                for line in genofile:
+                    if line[0] == "#" or line[0] == "@":
+                        output_lines.append([line.strip()])
+                    else:
+                        output_lines.append(line.split())
+
+            csv_writer = csv.writer(si, delimiter = '\t', escapechar = "\\", quoting = csv.QUOTE_NONE)
+        else:
+            return render_template("/api/no_results.html")
+    else:
+        filename = group_name + ".bimbam"
+
+        if os.path.isfile('{0}/{1}.geno'.format(flat_files('genotype'), group_name)):
+            output_lines = []
+            with open('{0}/{1}_geno.txt'.format(flat_files('genotype/bimbam'), group_name)) as genofile:
+                for line in genofile:
+                    output_lines.append([line.strip() for line in line.split(",")])
+
+            csv_writer = csv.writer(si, delimiter = ',')
+        else:
+            return render_template("/api/no_results.html")
+
+    csv_writer.writerows(output_lines)
+    output = make_response(si.getvalue())
+    output.headers["Content-Disposition"] = "attachment; filename=" + filename
+    output.headers["Content-type"] = "text/csv"
+
+    return output
+
+@app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=('GET',))
+@app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=('GET',))
+def get_traits(dataset_name, file_format = "json"):
+    #ZS: Need to check about the "start" and "stop" stuff since it seems to just limit the number of results to stop - start + 1 in Pjotr's elixir code
+
+    NotImplemented
+
+def get_dataset_trait_ids(dataset_name):
+    if "Geno" in dataset_name:
+        data_type = "Geno" #ZS: Need to pass back the dataset type
+        query =    """
+                            SELECT
+                                GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId
+                            FROM
+                                Geno, GenoXRef, GenoFreeze
+                            WHERE
+                                Geno.Id = GenoXRef.GenoId AND
+                                GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+                                GenoFreeze.Name = '{0}'
+                        """.format(dataset_name)
+
+        results = g.db.execute(query).fetchall()
+
+        trait_ids = [result[0] for result in results]
+        trait_names = [result[1] for result in results]
+        dataset_id = results[0][2]
+        return trait_ids, trait_names, data_type, dataset_id
+
+    elif "Publish" in dataset_name:
+        data_type = "Publish"
+        dataset_name = dataset_name.replace("Publish", "")
+        dataset_id = get_group_id(dataset_name)
+        
+        query = """
+                         SELECT
+                             PublishXRef.PhenotypeId
+                         FROM
+                             PublishXRef
+                         WHERE
+                             PublishXRef.InbredSetId = '{0}'
+                      """.format(dataset_id)
+
+        results = g.db.execute(query).fetchall()
+
+        trait_ids = [result[0] for result in results]
+        trait_names = trait_ids
+        return trait_ids, trait_names, data_type, dataset_id
+
+    else:
+        data_type = "ProbeSet"
+        query = """
+                        SELECT
+                            ProbeSetXRef.ProbeSetId, ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId
+                        FROM
+                            ProbeSet, ProbeSetXRef, ProbeSetFreeze
+                        WHERE
+                            ProbeSet.Id = ProbeSetXRef.ProbeSetId AND
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetFreeze.Name = '{0}'
+                     """.format(dataset_name)
+
+        results = g.db.execute(query).fetchall()
+
+        trait_ids = [result[0] for result in results]
+        trait_names = [result[1] for result in results]
+        dataset_id = results[0][2]
+        return trait_ids, trait_names, data_type, dataset_id
+
+def get_samplelist(dataset_name):
+    group_id = get_group_id_from_dataset(dataset_name)
+
+    query = """
+               SELECT Strain.Name
+               FROM Strain, StrainXRef
+               WHERE StrainXRef.StrainId = Strain.Id AND
+                     StrainXRef.InbredSetId = {}
+            """.format(group_id)
+    
+    results = g.db.execute(query).fetchall()
+    
+    samplelist = [result[0] for result in results]
+
+    return samplelist
+
+def get_group_id_from_dataset(dataset_name):
+    if "Publish" in dataset_name:
+        query = """
+                    SELECT
+                            InbredSet.Id
+                    FROM
+                            InbredSet, PublishFreeze
+                    WHERE
+                            PublishFreeze.InbredSetId = InbredSet.Id AND
+                            PublishFreeze.Name = "{}"
+                """.format(dataset_name)
+    elif "Geno" in dataset_name:
+        query = """
+                    SELECT
+                            InbredSet.Id
+                    FROM
+                            InbredSet, GenoFreeze
+                    WHERE
+                            GenoFreeze.InbredSetId = InbredSet.Id AND
+                            GenoFreeze.Name = "{}"
+                """.format(dataset_name)
+    else:
+        query = """
+                    SELECT
+                            InbredSet.Id
+                    FROM
+                            InbredSet, ProbeSetFreeze, ProbeFreeze
+                    WHERE
+                            ProbeFreeze.InbredSetId = InbredSet.Id AND
+                            ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+                            ProbeSetFreeze.Name = "{}"
+                """.format(dataset_name)
+
+    result = g.db.execute(query).fetchone()
+
+    return result[0]
+
+def get_group_id(group_name):
+    query = """
+               SELECT InbredSet.Id
+               FROM InbredSet
+               WHERE InbredSet.Name = '{}'
+            """.format(group_name)
+
+    group_id = g.db.execute(query).fetchone()
+    if group_id:
+        return group_id[0]
+    else:
+        return None
\ No newline at end of file