1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
"""module contains tests from datasets"""
import json
from unittest import TestCase
from unittest import mock
from collections import namedtuple
from gn3.computations.datasets import retrieve_trait_sample_data
from gn3.computations.datasets import get_query_for_dataset_sample
from gn3.computations.datasets import fetch_from_db_sample_data
from gn3.computations.datasets import create_dataset
from gn3.computations.datasets import dataset_creator_store
from gn3.computations.datasets import dataset_type_getter
from gn3.computations.datasets import fetch_dataset_type_from_gn2_api
from gn3.computations.datasets import fetch_dataset_sample_id
from gn3.computations.datasets import divide_into_chunks
from gn3.computations.datasets import get_traits_data
class TestDatasets(TestCase):
"""class contains tests for datasets"""
@mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
def test_retrieve_trait_sample_data(self, mock_fetch_sample_results):
"""test retrieving sample data\
for trait from the dataset"""
trait_name = "1419792_at"
dataset_id = "HC_M2_0606_P&"
dataset_type = "Publish"
dataset = {
"id": dataset_id,
"type": dataset_type,
"name": dataset_id
}
fetch_results = [('BXD32', 8.001, None, None, 'BXD32')]
mock_fetch_sample_results.return_value = fetch_results
results = retrieve_trait_sample_data(
dataset, trait_name)
self.assertEqual(mock_fetch_sample_results.call_count, 1)
self.assertEqual(results, fetch_results)
def test_query_for_dataset_sample(self):
"""test for getting query for sample data"""
no_results = get_query_for_dataset_sample("does not exists")
query_exists = get_query_for_dataset_sample("Publish")
self.assertEqual(no_results, None)
self.assertIsInstance(query_exists, str)
def test_fetch_from_db_sample_data(self):
"""test for function that fetches sample\
results from the database"""
database_results = [('BXD31', 8.001, None, None, 'BXD31'),
('BXD32', 7.884, None, None, 'BXD32'),
('BXD42', 7.682, None, None, 'BXD42'),
('BXD42', 7.682, None, None, 'BXD42'),
('BXD40', 7.945, None, None, 'BXD40'),
('BXD43', 7.873, None, None, 'BXD43')
]
database = mock.Mock()
db_cursor = mock.Mock()
db_cursor.execute.return_value = 6
db_cursor.fetchall.return_value = database_results
database.cursor.return_value = db_cursor
mock_pheno_query = """
SELECT
Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
WHERE
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND
PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
"""
fetch_results = fetch_from_db_sample_data(mock_pheno_query, database)
self.assertEqual(fetch_results, database_results)
@mock.patch("gn3.computations.datasets.dataset_creator_store")
@mock.patch("gn3.computations.datasets.dataset_type_getter")
def test_create_dataset(self, mock_dataset_type, mock_store):
"""test function that creates/fetches required dataset\
can either be published phenotype,genotype,Microarray or\
user defined ->Temp"""
probe_name = "HC_M2_0606_P"
probe_type = "ProbeSet"
mock_dataset_creator = namedtuple(
'ProbeSet', ["dataset_name", "dataset_type"])
mock_store.return_value = mock_dataset_creator
mock_dataset_type.return_value = probe_type
dataset = create_dataset(
dataset_type=None, dataset_name=probe_name)
self.assertEqual(dataset.dataset_name, probe_name)
self.assertEqual(dataset.dataset_type, probe_type)
def test_dataset_creator_store(self):
"""test for functions that actual
function to create differerent \
datasets"""
results = dataset_creator_store("ProbeSet")
self.assertTrue(results)
def test_dataset_type_getter(self):
"""test for fetching type of dataset given\
the dataset name"""
redis_instance = mock.Mock()
# found in redis
redis_instance.get.return_value = "ProbeSet"
results = dataset_type_getter("HC_M2_0_P", redis_instance)
self.assertEqual(results, "ProbeSet")
@mock.patch("gn3.computations.datasets.requests")
def test_fetch_dataset_type_from_gn2_api(self, mock_request):
"""test for function that test fetching\
all datasets from gn2 api in order to store\
in redis"""
expected_json_results = {"datasets": {
"arabidopsis": {
"BayXSha": {
"Genotypes": [
[
"None",
"BayXShaGeno",
"BayXSha Genotypes"
]
],
"Phenotypes": [
[
"642",
"BayXShaPublish",
"BayXSha Published Phenotypes"
]
]
}
}
}}
request_results = json.dumps(expected_json_results)
mock_request.get.return_value.content = request_results
results = fetch_dataset_type_from_gn2_api("HC_M2_0_P")
expected_results = {
"BayXShaGeno": "Geno",
"642": "Publish"
}
self.assertEqual(expected_results, results)
def test_fetch_dataset_sample_id(self):
"""get from the database the sample\
id if only in the samplelists"""
expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10,
"BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15}
database_instance = mock.Mock()
database_cursor = mock.Mock()
database_cursor.execute.return_value = 5
database_cursor.fetchall.return_value = list(expected_results.items())
database_instance.cursor.return_value = database_cursor
strain_list = ["B6D2F1", "BXD1", "BXD11",
"BXD12", "BXD13", "BXD16", "BXD15"]
results = fetch_dataset_sample_id(
samplelist=strain_list, database=database_instance, species="mouse")
self.assertEqual(results, expected_results)
@mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
@mock.patch("gn3.computations.datasets.divide_into_chunks")
def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples):
"""test for for function to get data\
of traits in dataset"""
# xtodo more tests needed for this
_expected_results = {'AT_DSAFDS': [
12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]}
database = mock.Mock()
sample_id = [1, 2, 7, 3, 22, 8]
mock_divide_into_chunks.return_value = [
[1, 2, 7], [3, 22, 8], [5, 22, 333]]
mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23)
results = get_traits_data(sample_id, database, "HC_M2", "Publish")
self.assertEqual({}, dict(results))
def test_divide_into_chunks(self):
"""test for dividing a list into given number of\
chunks for example"""
results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
self.assertEqual(results, expected_results)
|