about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/api/correlation.py12
-rw-r--r--gn3/api/datasets.py44
-rw-r--r--gn3/api/traits.py53
-rw-r--r--gn3/app.py4
-rw-r--r--gn3/computations/correlations.py13
-rw-r--r--gn3/computations/datasets.py323
-rw-r--r--gn3/computations/traits.py56
-rw-r--r--gn3/experimental_db.py11
-rw-r--r--gn3/settings.py3
-rw-r--r--tests/integration/test_datasets.py41
-rw-r--r--tests/integration/test_traits.py72
-rw-r--r--tests/unit/computations/test_correlation.py74
-rw-r--r--tests/unit/computations/test_datasets.py219
-rw-r--r--tests/unit/computations/test_trait.py84
14 files changed, 966 insertions, 43 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 2339088..f28e1f5 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -33,9 +33,10 @@ def compute_sample_integration(corr_method="pearson"):
 
 @correlation.route("/sample_r/<string:corr_method>", methods=["POST"])
 def compute_sample_r(corr_method="pearson"):
-    """correlation endpoint for computing sample r correlations\
+    """Correlation endpoint for computing sample r correlations\
     api expects the trait data with has the trait and also the\
-    target_dataset  data"""
+    target_dataset  data
+    """
     correlation_input = request.get_json()
 
     # xtodo move code below to compute_all_sampl correlation
@@ -53,9 +54,10 @@ def compute_sample_r(corr_method="pearson"):
 
 @correlation.route("/lit_corr/<string:species>/<int:gene_id>", methods=["POST"])
 def compute_lit_corr(species=None, gene_id=None):
-    """api endpoint for doing lit correlation.results for lit correlation\
+    """Api endpoint for doing lit correlation.results for lit correlation\
     are fetched from the database this is the only case where the db\
-    might be needed for actual computing of the correlation results"""
+    might be needed for actual computing of the correlation results
+    """
 
     conn, _cursor_object = database_connector()
     target_traits_gene_ids = request.get_json()
@@ -72,7 +74,7 @@ def compute_lit_corr(species=None, gene_id=None):
 
 @correlation.route("/tissue_corr/<string:corr_method>", methods=["POST"])
 def compute_tissue_corr(corr_method="pearson"):
-    """api endpoint fr doing tissue correlation"""
+    """Api endpoint fr doing tissue correlation"""
     tissue_input_data = request.get_json()
     primary_tissue_dict = tissue_input_data["primary_tissue"]
     target_tissues_dict = tissue_input_data["target_tissues_dict"]
diff --git a/gn3/api/datasets.py b/gn3/api/datasets.py
new file mode 100644
index 0000000..7f08de5
--- /dev/null
+++ b/gn3/api/datasets.py
@@ -0,0 +1,44 @@
+"""this module contains code for creating datasets"""
+from flask import Blueprint
+from flask import jsonify
+
+from gn3.computations.datasets import create_dataset
+from gn3.computations.datasets import get_traits_data
+from gn3.experimental_db import database_connector
+
+
+dataset = Blueprint("dataset", __name__)
+
+
+@dataset.route("/create/<dataset_name>/")
+@dataset.route("/create/<dataset_name>/<dataset_type>")
+def create_dataset_api(dataset_name, dataset_type=None):
+    """Endpoint of creating dataset"""
+
+    new_dataset = create_dataset(
+        dataset_type=dataset_type, dataset_name=dataset_name)
+
+    results = {
+        "dataset": new_dataset
+    }
+    return jsonify(results)
+
+
+@dataset.route("/fetch_traits_data/<dataset_name>/<dataset_type>")
+def fetch_traits_data(dataset_name, dataset_type):
+    """Endpoint for fetching Trait data"""
+    # should fetch this(temp)
+    trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15,
+                        17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31,
+                        35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115,
+                        116, 117, 118, 119, 120, 919, 147,
+                        121, 40, 41, 124, 125, 128, 135, 129, 130, 131,
+                        132, 134, 138, 139, 140, 141, 142, 144,
+                        145, 148, 149, 920, 922, 2, 3, 1, 1100]
+
+    conn, _cursor = database_connector()
+    results = get_traits_data(sample_ids=trait_sample_ids, database_instance=conn,
+                              dataset_name=dataset_name, dataset_type=dataset_type)
+    conn.close()
+
+    return jsonify({"results": results})
diff --git a/gn3/api/traits.py b/gn3/api/traits.py
new file mode 100644
index 0000000..0ac437d
--- /dev/null
+++ b/gn3/api/traits.py
@@ -0,0 +1,53 @@
+"""this module contains the all endpoints for traits"""
+from unittest import mock
+
+from flask import Blueprint
+from flask import jsonify
+from flask import request
+
+from gn3.computations.traits import fetch_trait
+from gn3.computations.traits import get_trait_info_data
+from gn3.experimental_db import database_connector
+
+trait = Blueprint("trait", __name__)
+
+
+@trait.route("/<string:trait_name>/<string:dataset_name>")
+def create_trait(trait_name, dataset_name):
+    """Endpoint for creating trait and fetching strain\
+    values"""
+
+    # xtodo replace the object at most this endpoint
+    # requires dataset_type,dataset_name ,dataset_id
+    trait_dataset = {
+        "name": dataset_name,
+        "id": 12,
+        "type": "ProbeSet"  # temp values
+    }
+    conn, _cursor = database_connector()
+
+    trait_results = fetch_trait(dataset=trait_dataset,
+                                trait_name=trait_name,
+                                database=conn)
+
+    conn.close()
+
+    return jsonify(trait_results)
+
+
+@trait.route("/trait_info/<string:trait_name>", methods=["POST"])
+def fetch_trait_info(trait_name):
+    """Api endpoint for fetching the trait info \
+    expects the trait and trait dataset to have\
+    been created """
+    data = request.get_json()
+
+    trait_dataset = data["trait_dataset"]
+    trait_data = data["trait"]
+    _trait_name = trait_name  # should be used as key to return results
+
+    database_instance = mock.Mock()
+
+    results = get_trait_info_data(trait_dataset, trait_data, database_instance)
+
+    return jsonify(results)
diff --git a/gn3/app.py b/gn3/app.py
index a684d25..f0f35f9 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -7,6 +7,8 @@ from flask import Flask
 from gn3.api.gemma import gemma
 from gn3.api.general import general
 from gn3.api.correlation import correlation
+from gn3.api.traits import trait
+from gn3.api.datasets import dataset
 
 
 def create_app(config: Union[Dict, str, None] = None) -> Flask:
@@ -28,4 +30,6 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
     app.register_blueprint(general, url_prefix="/api/")
     app.register_blueprint(gemma, url_prefix="/api/gemma")
     app.register_blueprint(correlation, url_prefix="/api/correlation")
+    app.register_blueprint(trait, url_prefix="/api/trait")
+    app.register_blueprint(dataset, url_prefix="/api/dataset")
     return app
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 26b7294..7fb67be 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -109,10 +109,9 @@ package :not packaged in guix
 
 def filter_shared_sample_keys(this_samplelist,
                               target_samplelist) -> Tuple[List, List]:
-    """Given primary and target samplelist for two base and target trait select
-filter the values using the shared keys
-
-    """
+    """Given primary and target samplelist\
+    for two base and target trait select\
+    filter the values using the shared keys"""
     this_vals = []
     target_vals = []
     for key, value in target_samplelist.items():
@@ -125,8 +124,9 @@ filter the values using the shared keys
 def compute_all_sample_correlation(this_trait,
                                    target_dataset,
                                    corr_method="pearson") -> List:
-    """Given a trait data samplelist and target__datasets compute all sample
-correlation"""
+    """Given a trait data samplelist and\
+    target__datasets compute all sample correlation
+    """
 
     this_trait_samples = this_trait["trait_sample_data"]
 
@@ -323,7 +323,6 @@ def compute_all_lit_correlation(conn, trait_lists: List,
                                 species: str, gene_id):
     """Function that acts as an abstraction for
     lit_correlation_for_trait_list"""
-    # xtodo to be refactored
 
     lit_results = lit_correlation_for_trait_list(
         conn=conn,
diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py
new file mode 100644
index 0000000..57e1fe1
--- /dev/null
+++ b/gn3/computations/datasets.py
@@ -0,0 +1,323 @@
+"""module contains the code all related to datasets"""
+import json
+from math import ceil
+from collections import defaultdict
+
+from typing import Optional
+from typing import List
+
+from dataclasses import dataclass
+from MySQLdb import escape_string  # type: ignore
+
+import requests
+from gn3.settings import GN2_BASE_URL
+
+
+def retrieve_trait_sample_data(dataset,
+                               trait_name: str,
+                               database,
+                               group_species_id=None) -> List:
+    """given the dataset id and trait_name fetch the\
+    sample_name,value from the dataset"""
+
+    # should pass the db as arg all  do a setup
+
+    (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get(
+        "id"), dataset.get("type"))
+
+    dataset_query = get_query_for_dataset_sample(dataset_type)
+    results = []
+    sample_query_values = {
+        "Publish": (trait_name, dataset_id),
+        "Geno": (group_species_id, trait_name, dataset_name),
+        "ProbeSet": (trait_name, dataset_name)
+    }
+
+    if dataset_query:
+        formatted_query = dataset_query % sample_query_values[dataset_type]
+
+        results = fetch_from_db_sample_data(formatted_query, database)
+
+    return results
+
+
+def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
+    """this is the function that does the actual fetching of\
+    results from the database"""
+    try:
+        cursor = database_instance.cursor()
+        cursor.execute(formatted_query)
+        results = cursor.fetchall()
+
+    except Exception as error:
+        raise error
+
+    cursor.close()
+
+    return results
+
+
+def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
+    """this functions contains querys for\
+    getting sample data from the db depending in
+    dataset"""
+    dataset_query = {}
+
+    pheno_query = """
+                SELECT
+                        Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
+                FROM
+                        (PublishData, Strain, PublishXRef, PublishFreeze)
+                left join PublishSE on
+                        (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+                left join NStrain on
+                        (NStrain.DataId = PublishData.Id AND
+                        NStrain.StrainId = PublishData.StrainId)
+                WHERE
+                        PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                        PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+                        PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
+                Order BY
+                        Strain.Name
+                """
+    geno_query = """
+                SELECT
+                        Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
+                FROM
+                        (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
+                left join GenoSE on
+                        (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
+                WHERE
+                        Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
+                        GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+                        GenoFreeze.Name = %s AND
+                        GenoXRef.DataId = GenoData.Id AND
+                        GenoData.StrainId = Strain.Id
+                Order BY
+                        Strain.Name
+                """
+
+    probeset_query = """
+                SELECT
+                        Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
+                FROM
+                        (ProbeSetData, ProbeSetFreeze,
+                         Strain, ProbeSet, ProbeSetXRef)
+                left join ProbeSetSE on
+                        (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                left join NStrain on
+                        (NStrain.DataId = ProbeSetData.Id AND
+                        NStrain.StrainId = ProbeSetData.StrainId)
+                WHERE
+                        ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                        ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                        ProbeSetFreeze.Name = '%s' AND
+                        ProbeSetXRef.DataId = ProbeSetData.Id AND
+                        ProbeSetData.StrainId = Strain.Id
+                Order BY
+                        Strain.Name
+                """
+
+    dataset_query["Publish"] = pheno_query
+    dataset_query["Geno"] = geno_query
+    dataset_query["ProbeSet"] = probeset_query
+
+    return dataset_query.get(dataset_type)
+
+
+@dataclass
+class Dataset:
+    """class for creating datasets"""
+    name: Optional[str] = None
+    dataset_type: Optional[str] = None
+    dataset_id: int = -1
+
+
+def create_mrna_tissue_dataset(dataset_name, dataset_type):
+    """an mrna assay is a quantitative assessment(assay) associated\
+    with an mrna trait.This used to be called probeset,but that term\
+    only referes specifically to the afffymetrix platform and is\
+    far too speficified"""
+
+    return Dataset(name=dataset_name, dataset_type=dataset_type)
+
+
+def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]:
+    """given the dataset name fetch the type\
+    of the dataset this in turn  enables fetching\
+    the creation of the correct object could utilize\
+    redis for the case"""
+
+    results = redis_instance.get(dataset_name, None)
+
+    if results:
+        return results
+
+    return fetch_dataset_type_from_gn2_api(dataset_name)
+
+
+def fetch_dataset_type_from_gn2_api(dataset_name):
+    """this function is only called when the\
+    the redis is empty and does have the specificied\
+    dataset_type"""
+    # should only run once
+
+    dataset_structure = {}
+
+    map_dataset_to_new_type = {
+        "Phenotypes": "Publish",
+        "Genotypes": "Geno",
+        "MrnaTypes": "ProbeSet"
+    }
+
+    data = json.loads(requests.get(
+        GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
+    _name = dataset_name
+    for species in data['datasets']:
+        for group in data['datasets'][species]:
+            for dataset_type in data['datasets'][species][group]:
+                for dataset in data['datasets'][species][group][dataset_type]:
+                    # assumes  the first is dataset_short_name
+                    short_dataset_name = next(
+                        item for item in dataset if item != "None" and item is not None)
+
+                    dataset_structure[short_dataset_name] = map_dataset_to_new_type.get(
+                        dataset_type, "MrnaTypes")
+    return dataset_structure
+
+
+def dataset_creator_store(dataset_type):
+    """function contains key value pairs for\
+    the function need to be called to create\
+    each dataset_type"""
+
+    dataset_obj = {
+        "ProbeSet": create_mrna_tissue_dataset
+    }
+
+    return dataset_obj[dataset_type]
+
+
+def create_dataset(dataset_type=None, dataset_name: str = None):
+    """function for creating new dataset  temp not implemented"""
+    if dataset_type is None:
+        dataset_type = dataset_type_getter(dataset_name)
+
+    dataset_creator = dataset_creator_store(dataset_type)
+    results = dataset_creator(
+        dataset_name=dataset_name, dataset_type=dataset_type)
+    return results
+
+
+def fetch_dataset_sample_id(samplelist: List, database, species: str) -> dict:
+    """fetch the strain ids from the db only if\
+    it is in the samplelist"""
+    # xtodo create an in clause for samplelist
+
+    strain_query = """
+        SELECT Strain.Name, Strain.Id FROM Strain, Species
+        WHERE Strain.Name IN {}
+        and Strain.SpeciesId=Species.Id
+        and Species.name = '{}'
+        """
+
+    database_cursor = database.cursor()
+    database_cursor.execute(strain_query.format(samplelist, species))
+
+    results = database_cursor.fetchall()
+
+    return dict(results)
+
+
+def divide_into_chunks(the_list, number_chunks):
+    """Divides a list into approximately number_chunks
+    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
+    [[1, 2, 7], [3, 22, 8], [5, 22, 333]]"""
+
+    length = len(the_list)
+    if length == 0:
+        return [[]]
+
+    if length <= number_chunks:
+        number_chunks = length
+    chunk_size = int(ceil(length/number_chunks))
+    chunks = []
+
+    for counter in range(0, length, chunk_size):
+        chunks.append(the_list[counter:counter+chunk_size])
+    return chunks
+
+
+def escape(string_):
+    """function escape sql value"""
+    return escape_string(string_).decode('utf8')
+
+
+def mescape(*items) -> List:
+    """multiple escape for query values"""
+
+    return [escape_string(str(item)).decode('utf8') for item in items]
+
+
+def get_traits_data(sample_ids, database_instance, dataset_name, dataset_type):
+    """function to fetch trait data"""
+    # MySQL limits the number of tables that can be used in a join to 61,
+    # so we break the sample ids into smaller chunks
+    # Postgres doesn't have that limit, so we can get rid of this after we transition
+
+    _trait_data = defaultdict(list)
+    chunk_size = 61
+    number_chunks = int(ceil(len(sample_ids) / chunk_size))
+    for sample_ids_step in divide_into_chunks(sample_ids, number_chunks):
+        if dataset_type == "Publish":
+            full_dataset_type = "Phenotype"
+        else:
+            full_dataset_type = dataset_type
+        temp = ['T%s.value' % item for item in sample_ids_step]
+
+        if dataset_type == "Publish":
+            query = "SELECT {}XRef.Id,".format(escape(dataset_type))
+
+        else:
+            query = "SELECT {}.Name,".format(escape(full_dataset_type))
+
+        query += ', '.join(temp)
+        query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(full_dataset_type,
+                                                                 dataset_type,
+                                                                 dataset_type))
+        for item in sample_ids_step:
+
+            query += """
+                    left join {}Data as T{} on T{}.Id = {}XRef.DataId
+                    and T{}.StrainId={}\n
+                    """.format(*mescape(dataset_type, item,
+                                        item, dataset_type, item, item))
+
+        if dataset_type == "Publish":
+            query += """
+                        WHERE {}XRef.{}FreezeId = {}Freeze.Id
+                        and {}Freeze.Name = '{}'
+                        and {}.Id = {}XRef.{}Id
+                        order by {}.Id
+                        """.format(*mescape(dataset_type, dataset_type,
+                                            dataset_type, dataset_type,
+                                            dataset_name, full_dataset_type,
+                                            dataset_type, dataset_type,
+                                            full_dataset_type))
+
+        else:
+            query += """
+                        WHERE {}XRef.{}FreezeId = {}Freeze.Id
+                        and {}Freeze.Name = '{}'
+                        and {}.Id = {}XRef.{}Id
+                        order by {}.Id
+                        """.format(*mescape(dataset_type, dataset_type,
+                                            dataset_type, dataset_type,
+                                            dataset_name, dataset_type,
+                                            dataset_type, dataset_type,
+                                            full_dataset_type))
+
+        # print(query)
+
+        _results = fetch_from_db_sample_data(query, database_instance)
+    return {}
diff --git a/gn3/computations/traits.py b/gn3/computations/traits.py
new file mode 100644
index 0000000..1aa2970
--- /dev/null
+++ b/gn3/computations/traits.py
@@ -0,0 +1,56 @@
+"""module contains all operating related to traits"""
+from gn3.computations.datasets import retrieve_trait_sample_data
+
+
+def fetch_trait(dataset, trait_name: str, database) -> dict:
+    """this method creates a trait by\
+    fetching required data given the\
+    dataset and trait_name"""
+
+    created_trait = {
+        "dataset": dataset,
+        "trait_name": trait_name
+    }
+
+    trait_data = get_trait_sample_data(dataset, trait_name, database)
+
+    created_trait["trait_data"] = trait_data
+
+    return created_trait
+
+
+def get_trait_sample_data(trait_dataset, trait_name, database) -> dict:
+    """first try to fetch the traits sample data from redis if that\
+    try to fetch from the traits dataset redis is only  used for\
+    temp dataset type which is not used in this case """
+
+    sample_results = retrieve_trait_sample_data(
+        trait_dataset, trait_name, database)
+
+    trait_data = {}
+
+    for (name, sample_value, _variance, _numcase, _name2) in sample_results:
+
+        trait_data[name] = sample_value
+    return trait_data
+
+
+def get_trait_info_data(trait_dataset,
+                        trait_name: str,
+                        database_instance,
+                        get_qtl_info: bool = False) -> dict:
+    """given a dataset and trait_name return a dict containing all info\
+    regarding the get trait"""
+
+    _temp_var_holder = (trait_dataset, trait_name,
+                        database_instance, get_qtl_info)
+    trait_info_data = {
+        "description": "",
+        "chr": "",
+        "locus": "",
+        "mb": "",
+        "abbreviation": "",
+        "trait_display_name": ""
+
+    }
+    return trait_info_data
diff --git a/gn3/experimental_db.py b/gn3/experimental_db.py
new file mode 100644
index 0000000..a07aeba
--- /dev/null
+++ b/gn3/experimental_db.py
@@ -0,0 +1,11 @@
+"""this function contains experimental db staff"""
+from typing import Tuple
+import MySQLdb as mdb   # type: ignore
+
+
+def database_connector()->Tuple:
+    """function to create db connector"""
+    conn = mdb.connect("localhost", "kabui", "1234", "db_webqtl")
+    cursor = conn.cursor()
+
+    return (conn, cursor)
diff --git a/gn3/settings.py b/gn3/settings.py
index e77a977..478a041 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -15,3 +15,6 @@ TMPDIR = os.environ.get("TMPDIR", tempfile.gettempdir())
 SQL_URI = os.environ.get("SQL_URI", "mysql://kabui:1234@localhost/db_webqtl")
 SECRET_KEY = "password"
 SQLALCHEMY_TRACK_MODIFICATIONS = False
+# gn2 results only used in fetching dataset info
+
+GN2_BASE_URL = "http://www.genenetwork.org/"
diff --git a/tests/integration/test_datasets.py b/tests/integration/test_datasets.py
new file mode 100644
index 0000000..f97d970
--- /dev/null
+++ b/tests/integration/test_datasets.py
@@ -0,0 +1,41 @@
+"""This module contains integration tests for datasets"""
+from unittest import TestCase
+from unittest import mock
+
+from collections import namedtuple
+from gn3.app import create_app
+
+
+class DatasetIntegrationTests(TestCase):
+    """class contains integration tests for datasets"""
+
+    def setUp(self):
+        self.app = create_app().test_client()
+
+    @mock.patch("gn3.api.datasets.create_dataset")
+    def test_create_dataset(self, mock_dataset):
+        """Test for creating dataset object"""
+        mock_dataset_creator = namedtuple(
+            'ProbeSet', ["dataset_name", "dataset_type"])
+        new_dataset = mock_dataset_creator("HC_M2_0606_P", "ProbeSet")
+        mock_dataset.return_value = new_dataset
+        response = self.app.get(
+            "/api/dataset/create/HC_M2_0606_P/", follow_redirects=True)
+        mock_dataset.assert_called_once_with(
+            dataset_type=None, dataset_name="HC_M2_0606_P")
+        results = response.get_json()["dataset"]
+        self.assertEqual(results[1], "ProbeSet")
+        self.assertEqual(response.status_code, 200)
+
+    @mock.patch("gn3.api.datasets.get_traits_data")
+    @mock.patch("gn3.api.datasets.database_connector")
+    def test_fetch_traits_data(self, mock_db, mock_get_trait_data):
+        """Test api/dataset/fetch_traits_data/d_name/d_type"""
+
+        mock_get_trait_data.return_value = {}
+        mock_db.return_value = (mock.Mock(), mock.Mock())
+        response = self.app.get(
+            "/api/dataset/fetch_traits_data/Aging-Brain-UCIPublish/Publish", follow_redirects=True)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.get_json(), {"results": {}})
diff --git a/tests/integration/test_traits.py b/tests/integration/test_traits.py
new file mode 100644
index 0000000..410ba22
--- /dev/null
+++ b/tests/integration/test_traits.py
@@ -0,0 +1,72 @@
+"""module contains integration tests for trait endpoints"""
+from unittest import TestCase
+from unittest import mock
+
+from gn3.app import create_app
+
+
+class TraitIntegrationTest(TestCase):
+    """class contains integration tests for\
+    traits"""
+
+    def setUp(self):
+        self.app = create_app().test_client()
+
+    @mock.patch("gn3.api.traits.fetch_trait")
+    @mock.patch("gn3.api.traits.database_connector")
+    def test_create_trait(self, mock_database, mock_fetch_trait):
+        """test the endpoint for creating traits\
+        endpoint requires trait name and dataset name"""
+        mock_database.return_value = (mock.Mock(), mock.Mock())
+        trait_results = {
+            "dataset": None,
+            "trait_name": "1449593_at",
+            "trait_data": {
+                "BXD11": 8.464,
+                "BXD12": 8.414,
+                "BXD13": 8.753,
+                "BXD15": 8.5,
+                "BXD16": 8.832
+            }
+
+        }
+        mock_fetch_trait.return_value = trait_results
+
+        results = self.app.get(
+            "/api/trait/1449593_at/HC_M2_0606_P", follow_redirects=True)
+
+        trait_data = results.get_json()
+
+        self.assertEqual(mock_database.call_count, 1)
+        self.assertEqual(results.status_code, 200)
+        self.assertEqual(trait_data, trait_results)
+
+    @mock.patch("gn3.api.traits.get_trait_info_data")
+    def test_retrieve_trait_info(self, mock_get_trait_info):
+        """integration test for endpoints for retrieving\
+        trait info expects the dataset of trait to have been
+        created"""
+
+        trait_post_data = {
+            "trait": {"trait_name": ""},
+            "trait_dataset": {"dataset_name": ""}
+        }
+
+        expected_api_results = {
+            "description": "trait description",
+            "chr": "",
+            "locus": "",
+            "mb": "",
+            "abbreviation": "trait_abbreviation",
+            "trait_display_name": "trait_name"
+
+        }
+        mock_get_trait_info.return_value = expected_api_results
+
+        trait_info = self.app.post(
+            "/api/trait/trait_info/144_at", json=trait_post_data, follow_redirects=True)
+
+        trait_info_results = trait_info.get_json()
+
+        self.assertEqual(trait_info.status_code, 200)
+        self.assertEqual(trait_info_results, expected_api_results)
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
index 52d1f60..8f3ef25 100644
--- a/tests/unit/computations/test_correlation.py
+++ b/tests/unit/computations/test_correlation.py
@@ -1,4 +1,4 @@
-"""module contains the tests for correlation"""
+"""Module contains the tests for correlation"""
 import unittest
 from unittest import TestCase
 from unittest import mock
@@ -88,10 +88,10 @@ class DataBase(QueryableMixin):
 
 
 class TestCorrelation(TestCase):
-    """class for testing correlation functions"""
+    """Class for testing correlation functions"""
 
     def test_normalize_values(self):
-        """function to test normalizing values """
+        """Function to test normalizing values """
         results = normalize_values([2.3, None, None, 3.2, 4.1, 5],
                                    [3.4, 7.2, 1.3, None, 6.2, 4.1])
 
@@ -100,7 +100,7 @@ class TestCorrelation(TestCase):
         self.assertEqual(results, expected_results)
 
     def test_bicor(self):
-        """test for doing biweight mid correlation """
+        """Test for doing biweight mid correlation """
 
         results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6])
 
@@ -110,8 +110,9 @@ class TestCorrelation(TestCase):
     @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value")
     @mock.patch("gn3.computations.correlations.normalize_values")
     def test_compute_sample_r_correlation(self, norm_vals, compute_corr):
-        """test for doing sample correlation gets the cor\
-        and p value and rho value using pearson correlation"""
+        """Test for doing sample correlation gets the cor\
+        and p value and rho value using pearson correlation
+        """
         primary_values = [2.3, 4.1, 5]
         target_values = [3.4, 6.2, 4.1]
 
@@ -141,7 +142,7 @@ class TestCorrelation(TestCase):
             spearman_results, tuple, "message")
 
     def test_filter_shared_sample_keys(self):
-        """function to  tests shared key between two dicts"""
+        """Function to  tests shared key between two dicts"""
 
         this_samplelist = {
             "C57BL/6J": "6.638",
@@ -170,7 +171,7 @@ class TestCorrelation(TestCase):
     @mock.patch("gn3.computations.correlations.compute_sample_r_correlation")
     @mock.patch("gn3.computations.correlations.filter_shared_sample_keys")
     def test_compute_all_sample(self, filter_shared_samples, sample_r_corr):
-        """given target dataset compute all sample r correlation"""
+        """Given target dataset compute all sample r correlation"""
 
         filter_shared_samples.return_value = (["1.23", "6.565", "6.456"], [
             "6.266", "6.565", "6.456"])
@@ -200,7 +201,6 @@ class TestCorrelation(TestCase):
         sample_all_results = [{"1419792_at": {"corr_coeffient": -1.0,
                                               "p_value": 0.9,
                                               "num_overlap": 6}}]
-        # ?corr_method: str, trait_vals, target_samples_vals
 
         self.assertEqual(compute_all_sample_correlation(
             this_trait=this_trait_data, target_dataset=traits_dataset), sample_all_results)
@@ -212,9 +212,10 @@ class TestCorrelation(TestCase):
 
     @unittest.skip("not implemented")
     def test_tissue_lit_corr_for_probe_type(self):
-        """tests for doing tissue and lit correlation for  trait list\
+        """Tests for doing tissue and lit correlation for  trait list\
         if both the dataset and target dataset are probeset runs\
-        on after initial correlation has been done"""
+        on after initial correlation has been done
+        """
 
         results = tissue_lit_corr_for_probe_type(
             corr_type="tissue", top_corr_results={})
@@ -223,8 +224,9 @@ class TestCorrelation(TestCase):
 
     @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value")
     def test_tissue_correlation_for_trait_list(self, mock_compute_corr_coeff):
-        """test given a primary tissue values for a trait  and and a list of\
-        target tissues for traits  do the tissue correlation for them"""
+        """Test given a primary tissue values for a trait  and and a list of\
+        target tissues for traits  do the tissue correlation for them
+        """
 
         primary_tissue_values = [1.1, 1.5, 2.3]
         target_tissues_values = [1, 2, 3]
@@ -241,8 +243,9 @@ class TestCorrelation(TestCase):
     @mock.patch("gn3.computations.correlations.fetch_lit_correlation_data")
     @mock.patch("gn3.computations.correlations.map_to_mouse_gene_id")
     def test_lit_correlation_for_trait_list(self, mock_mouse_gene_id, fetch_lit_data):
-        """fetch results from  db call for lit correlation given a trait list\
-        after doing correlation"""
+        """Fetch results from  db call for lit correlation given a trait list\
+        after doing correlation
+        """
 
         target_trait_lists = [("1426679_at", 15),
                               ("1426702_at", 17),
@@ -265,8 +268,9 @@ class TestCorrelation(TestCase):
         self.assertEqual(lit_results, expected_results)
 
     def test_fetch_lit_correlation_data(self):
-        """test for fetching lit correlation data from\
-        the database where the input and mouse geneid are none"""
+        """Test for fetching lit correlation data from\
+        the database where the input and mouse geneid are none
+        """
 
         conn = DataBase()
         results = fetch_lit_correlation_data(conn=conn,
@@ -277,8 +281,9 @@ class TestCorrelation(TestCase):
         self.assertEqual(results, ("1", 0))
 
     def test_fetch_lit_correlation_data_db_query(self):
-        """test for fetching lit corr coefficent givent the input\
-         input trait mouse gene id and mouse gene id"""
+        """Test for fetching lit corr coefficent givent the input\
+         input trait mouse gene id and mouse gene id
+        """
 
         expected_db_results = [namedtuple("lit_coeff", "val")(x*0.1)
                                for x in range(1, 4)]
@@ -293,9 +298,12 @@ class TestCorrelation(TestCase):
         self.assertEqual(expected_results, lit_results)
 
     def test_query_lit_correlation_for_db_empty(self):
-        """test that corr coeffient returned is 0 given the\
-        db value if corr coefficient is empty"""
-        database_instance = DataBase()
+        """Test that corr coeffient returned is 0 given the\
+        db value if corr coefficient is empty
+        """
+        database_instance = mock.Mock()
+        database_instance.execute.return_value.fetchone.return_value = None
+
         lit_results = fetch_lit_correlation_data(conn=database_instance,
                                                  input_mouse_gene_id="12",
                                                  gene_id="16",
@@ -304,8 +312,9 @@ class TestCorrelation(TestCase):
         self.assertEqual(lit_results, ("16", 0))
 
     def test_query_formatter(self):
-        """test for formatting a query given the query string and also the\
-        values"""
+        """Test for formatting a query given the query string and also the\
+        values
+        """
         query = """
         SELECT VALUE
         FROM  LCorr
@@ -330,16 +339,18 @@ class TestCorrelation(TestCase):
         self.assertEqual(formatted_query, expected_formatted_query)
 
     def test_query_formatter_no_query_values(self):
-        """test for formatting a query where there are no\
-        string placeholder"""
+        """Test for formatting a query where there are no\
+        string placeholder
+        """
         query = """SELECT * FROM  USERS"""
         formatted_query = query_formatter(query)
 
         self.assertEqual(formatted_query, query)
 
     def test_map_to_mouse_gene_id(self):
-        """test for converting a gene id to mouse geneid\
-        given a species which is not mouse"""
+        """Test for converting a gene id to mouse geneid\
+        given a species which is not mouse
+        """
         database_instance = mock.Mock()
         test_data = [("Human", 14), (None, 9), ("Mouse", 15), ("Rat", 14)]
 
@@ -361,9 +372,10 @@ class TestCorrelation(TestCase):
 
     @mock.patch("gn3.computations.correlations.lit_correlation_for_trait_list")
     def test_compute_all_lit_correlation(self, mock_lit_corr):
-        """test for compute all lit correlation which acts\
+        """Test for compute all lit correlation which acts\
         as an abstraction for lit_correlation_for_trait_list
-        and is used in the api/correlation/lit"""
+        and is used in the api/correlation/lit
+        """
 
         database = mock.Mock()
 
@@ -385,7 +397,7 @@ class TestCorrelation(TestCase):
     @mock.patch("gn3.computations.correlations.tissue_correlation_for_trait_list")
     @mock.patch("gn3.computations.correlations.process_trait_symbol_dict")
     def test_compute_all_tissue_correlation(self, process_trait_symbol, mock_tissue_corr):
-        """test for compute all tissue corelation which abstracts
+        """Test for compute all tissue corelation which abstracts
         api calling the tissue_correlation for trait_list"""
 
         primary_tissue_dict = {"trait_id": "1419792_at",
diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py
new file mode 100644
index 0000000..f9e9c2b
--- /dev/null
+++ b/tests/unit/computations/test_datasets.py
@@ -0,0 +1,219 @@
+"""Module contains tests from datasets"""
+import json
+
+from unittest import TestCase
+from unittest import mock
+
+from collections import namedtuple
+
+from gn3.computations.datasets import retrieve_trait_sample_data
+from gn3.computations.datasets import get_query_for_dataset_sample
+from gn3.computations.datasets import fetch_from_db_sample_data
+from gn3.computations.datasets import create_dataset
+from gn3.computations.datasets import dataset_creator_store
+from gn3.computations.datasets import dataset_type_getter
+from gn3.computations.datasets import fetch_dataset_type_from_gn2_api
+from gn3.computations.datasets import fetch_dataset_sample_id
+from gn3.computations.datasets import divide_into_chunks
+from gn3.computations.datasets import get_traits_data
+
+
+class TestDatasets(TestCase):
+    """Class contains tests for datasets"""
+
+    @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
+    def test_retrieve_trait_sample_data(self, mock_fetch_sample_results):
+        """Test  retrieving sample data\
+         for trait from the dataset
+        """
+        trait_name = "1419792_at"
+        dataset_id = "HC_M2_0606_P&"
+        dataset_type = "Publish"
+
+        database = mock.Mock()
+
+        dataset = {
+            "id": dataset_id,
+            "type": dataset_type,
+            "name": dataset_id
+        }
+
+        fetch_results = [('BXD32', 8.001, None, None, 'BXD32')]
+
+        mock_fetch_sample_results.return_value = fetch_results
+
+        results = retrieve_trait_sample_data(
+            dataset, trait_name, database)
+        self.assertEqual(mock_fetch_sample_results.call_count, 1)
+        self.assertEqual(results, fetch_results)
+
+    def test_query_for_dataset_sample(self):
+        """Test for getting query for sample data"""
+
+        no_results = get_query_for_dataset_sample("does not exists")
+
+        query_exists = get_query_for_dataset_sample("Publish")
+
+        self.assertEqual(no_results, None)
+        self.assertIsInstance(query_exists, str)
+
+    def test_fetch_from_db_sample_data(self):
+        """Test for function that fetches sample\
+        results from the database
+        """
+
+        database_results = [('BXD31', 8.001, None, None, 'BXD31'),
+                            ('BXD32', 7.884, None, None, 'BXD32'),
+                            ('BXD42', 7.682, None, None, 'BXD42'),
+                            ('BXD42', 7.682, None, None, 'BXD42'),
+                            ('BXD40', 7.945, None, None, 'BXD40'),
+                            ('BXD43', 7.873, None, None, 'BXD43')
+                            ]
+
+        database = mock.Mock()
+        db_cursor = mock.Mock()
+        db_cursor.execute.return_value = 6
+        db_cursor.fetchall.return_value = database_results
+        database.cursor.return_value = db_cursor
+
+        mock_pheno_query = """
+                    SELECT
+                            Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
+                    WHERE
+                            PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+                            PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND
+                            PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id
+                    Order BY
+                            Strain.Name
+                    """
+        fetch_results = fetch_from_db_sample_data(mock_pheno_query, database)
+
+        self.assertEqual(fetch_results, database_results)
+
+    @mock.patch("gn3.computations.datasets.dataset_creator_store")
+    @mock.patch("gn3.computations.datasets.dataset_type_getter")
+    def test_create_dataset(self, mock_dataset_type, mock_store):
+        """Test function that creates/fetches required dataset\
+        can either be published phenotype,genotype,Microarray or\
+        user defined ->Temp
+        """
+        probe_name = "HC_M2_0606_P"
+        probe_type = "ProbeSet"
+
+        mock_dataset_creator = namedtuple(
+            'ProbeSet', ["dataset_name", "dataset_type"])
+
+        mock_store.return_value = mock_dataset_creator
+        mock_dataset_type.return_value = probe_type
+        dataset = create_dataset(
+            dataset_type=None, dataset_name=probe_name)
+
+        self.assertEqual(dataset.dataset_name, probe_name)
+        self.assertEqual(dataset.dataset_type, probe_type)
+
+    def test_dataset_creator_store(self):
+        """Test  for functions that actual
+        function to create differerent \
+        datasets
+        """
+        results = dataset_creator_store("ProbeSet")
+
+        self.assertTrue(results)
+
+    def test_dataset_type_getter(self):
+        """Test for fetching type of dataset given\
+        the dataset name
+        """
+
+        redis_instance = mock.Mock()
+        # fetched  in redis
+        redis_instance.get.return_value = "ProbeSet"
+        results = dataset_type_getter("HC_M2_0_P", redis_instance)
+        self.assertEqual(results, "ProbeSet")
+
+    @mock.patch("gn3.computations.datasets.requests")
+    def test_fetch_dataset_type_from_gn2_api(self, mock_request):
+        """Test for function that test fetching\
+        all datasets from gn2 api in order to store\
+        in redis
+        """
+
+        expected_json_results = {"datasets": {
+            "arabidopsis": {
+                "BayXSha": {
+                    "Genotypes": [
+                        [
+                            "None",
+                            "BayXShaGeno",
+                            "BayXSha Genotypes"
+                        ]
+                    ],
+                    "Phenotypes": [
+                        [
+                            "642",
+                            "BayXShaPublish",
+                            "BayXSha Published Phenotypes"
+                        ]
+                    ]
+                }
+            }
+        }}
+
+        request_results = json.dumps(expected_json_results)
+        mock_request.get.return_value.content = request_results
+        results = fetch_dataset_type_from_gn2_api("HC_M2_0_P")
+        expected_results = {
+            "BayXShaGeno": "Geno",
+            "642": "Publish"
+        }
+
+        self.assertEqual(expected_results, results)
+
+    def test_fetch_dataset_sample_id(self):
+        """Get from the database the sample\
+        id if only in the samplelists
+        """
+
+        expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10,
+                            "BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15}
+
+        database_instance = mock.Mock()
+        database_cursor = mock.Mock()
+
+        database_cursor.execute.return_value = 5
+        database_cursor.fetchall.return_value = list(expected_results.items())
+        database_instance.cursor.return_value = database_cursor
+        strain_list = ["B6D2F1", "BXD1", "BXD11",
+                       "BXD12", "BXD13", "BXD16", "BXD15"]
+
+        results = fetch_dataset_sample_id(
+            samplelist=strain_list, database=database_instance, species="mouse")
+
+        self.assertEqual(results, expected_results)
+
+    @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
+    @mock.patch("gn3.computations.datasets.divide_into_chunks")
+    def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples):
+        """Test for for function to get data\
+        of traits in dataset
+        """
+        _expected_results = {'AT_DSAFDS': [
+            12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]}
+        database = mock.Mock()
+        sample_id = [1, 2, 7, 3, 22, 8]
+        mock_divide_into_chunks.return_value = [
+            [1, 2, 7], [3, 22, 8], [5, 22, 333]]
+        mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23)
+        results = get_traits_data(sample_id, database, "HC_M2", "Publish")
+
+        self.assertEqual({}, dict(results))
+
+    def test_divide_into_chunks(self):
+        """Test for dividing a list into given number of\
+        chunks for example
+        """
+        results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
+
+        expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+
+        self.assertEqual(results, expected_results)
diff --git a/tests/unit/computations/test_trait.py b/tests/unit/computations/test_trait.py
new file mode 100644
index 0000000..feb97c6
--- /dev/null
+++ b/tests/unit/computations/test_trait.py
@@ -0,0 +1,84 @@
+"""Module contains tests for creating traits"""
+from unittest import TestCase
+from unittest import mock
+
+from gn3.computations.traits import fetch_trait
+from gn3.computations.traits import get_trait_sample_data
+from gn3.computations.traits import get_trait_info_data
+
+
+class TestTrait(TestCase):
+    """Class contains tests for creating traits"""
+
+    @mock.patch("gn3.computations.traits.get_trait_sample_data")
+    def test_fetch_trait(self, get_sample_data):
+        """Test for creating/fetching trait"""
+
+        expected_sample_data = {
+            "A/Y": 12.3,
+            "WQC": 11.1
+        }
+
+        database = mock.Mock()
+
+        get_sample_data.return_value = expected_sample_data
+
+        expected_trait = {
+            "trait_name": "AXFDSF_AT",
+            "dataset": None,
+            "trait_data": expected_sample_data
+        }
+        results = fetch_trait(dataset=None,
+                              trait_name="AXFDSF_AT",
+                              database=database)
+
+        self.assertEqual(results, expected_trait)
+        self.assertEqual(get_sample_data.call_count, 1)
+
+    @mock.patch("gn3.computations.traits.retrieve_trait_sample_data")
+    def test_get_trait_sample_data(self, mock_retrieve_sample_data):
+        """Test for getting sample data from  either\
+        the trait's dataset or form redis
+        """
+
+        trait_dataset = mock.Mock()
+        dataset_trait_sample_data = [
+            ('129S1/SvImJ', 7.433, None, None, '129S1/SvImJ'),
+            ('A/J', 7.596, None, None, 'A/J'),
+            ('AKR/J', 7.774, None, None, 'AKR/J'),
+            ('B6D2F1', 7.707, None, None, 'B6D2F1')]
+        mock_retrieve_sample_data.return_value = dataset_trait_sample_data
+
+        trait_name = "1426679_at"
+
+        database = mock.Mock()
+
+        results = get_trait_sample_data(
+            trait_dataset, trait_name, database)
+
+        expected_results = {
+            "129S1/SvImJ": 7.433,
+            "A/J": 7.596,
+            "AKR/J": 7.774,
+            "B6D2F1": 7.707
+        }
+
+        self.assertEqual(results, expected_results)
+
+    def test_get_trait_info_data(self):
+        """Test for getting info data related\
+        to trait
+        """
+
+        results = get_trait_info_data(
+            trait_name="AXSF_AT", trait_dataset=mock.Mock(), database_instance=None)
+        expected_trait_info = {
+            "description": "",
+            "trait_display_name": "",
+            "abbreviation": "",
+            "chr": "",
+            "mb": "",
+            "locus": ""
+        }
+
+        self.assertEqual(results, expected_trait_info)