From ffeb3a61f770a78ad5b47e5587bae5a9bcb2f77d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 01:40:47 +0300 Subject: initial commit for queries perfomance tests --- tests/performance/__init__.py | 0 tests/performance/test_query.py | 119 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tests/performance/__init__.py create mode 100644 tests/performance/test_query.py diff --git a/tests/performance/__init__.py b/tests/performance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py new file mode 100644 index 0000000..c0a3210 --- /dev/null +++ b/tests/performance/test_query.py @@ -0,0 +1,119 @@ +"""module contains performance tests for queries""" + +import time +import sys + +from inspect import getmembers +from inspect import isfunction + +from functools import wraps +from gn3.db_utils import database_connector + + +def timer(func): + """time function""" + @wraps(func) + def wrapper_time(*args, **kwargs): + """time wrapper""" + start_time = time.perf_counter() + results = func(*args, **kwargs) + end_time = time.perf_counter() + run_time = end_time - start_time + print(f"the time taken is {run_time:.3f} seconds") + return results + + return wrapper_time + + +def query_executor(query, fetch_all=True): + """function to execute a query""" + conn, _ = database_connector() + + with conn: + cursor = conn.cursor() + cursor.execute(query) + + if fetch_all: + return cursor.fetchall() + return cursor.fetchone() + + +def fetch_probeset_query(dataset_name): + """contains queries for datasets""" + + query = """SELECT * from ProbeSetData + where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, + 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, + 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, + 487, 105, 106, 110, 115,116, 117, 118, 119, + 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, + 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, + 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) + + return query + + +@timer +def perf_simple_query(): + """initial simple query test""" + + query = """select * from ProbeSetData limit 1""" + + _results = query_executor(query) + + return {} + + +@timer +def perf_hc_m2_dataset(): + """test the default dataset HC_M2_0606_P""" + + query = fetch_probeset_query("HC_M2_0606_P") + + _results = query_executor(query) + + return {} + + +@timer +def perf_umutaffyexon_dataset(): + """largest dataset in gn""" + + query = fetch_probeset_query("UMUTAffyExon_0209_RMA") + _results = query_executor(query) + return {} + + +def fetch_perf_functions(): + """function to filter all functions strwith perf_""" + name_func_dict = {name: obj for name, obj in + getmembers(sys.modules[__name__], isfunction)if isfunction( + obj) and obj.__module__ == __name__ and name.startswith('perf_')} + + return name_func_dict + + +def fetch_cmd_args(): + """function to fetch cmd args""" + cmd_args = sys.argv[1:] + + name_func_dict = fetch_perf_functions() + + if len(cmd_args) > 0: + callables = [func_call for name, + func_call in name_func_dict.items() if name in cmd_args] + + return callables + + return list(name_func_dict.values()) + + +if __name__ == '__main__': + func_list = fetch_cmd_args() + for call_func in func_list: + call_func() -- cgit v1.2.3 From d15cbf953bd3b3e9260e88d5663a1cf20414d776 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:00:35 +0300 Subject: add types and dataset names --- tests/performance/test_query.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py index c0a3210..222e8d6 100644 --- a/tests/performance/test_query.py +++ b/tests/performance/test_query.py @@ -6,6 +6,7 @@ import sys from inspect import getmembers from inspect import isfunction +from typing import Optional from functools import wraps from gn3.db_utils import database_connector @@ -25,9 +26,12 @@ def timer(func): return wrapper_time -def query_executor(query, fetch_all=True): +def query_executor(query: str, + dataset_name: Optional[str] = "dataset_name", + fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() + print(f"Performance tests for {dataset_name}") with conn: cursor = conn.cursor() @@ -38,7 +42,7 @@ def query_executor(query, fetch_all=True): return cursor.fetchone() -def fetch_probeset_query(dataset_name): +def fetch_probeset_query(dataset_name: str): """contains queries for datasets""" query = """SELECT * from ProbeSetData @@ -75,7 +79,7 @@ def perf_hc_m2_dataset(): query = fetch_probeset_query("HC_M2_0606_P") - _results = query_executor(query) + _results = query_executor(query, "HC_M2_0606_P") return {} @@ -85,21 +89,24 @@ def perf_umutaffyexon_dataset(): """largest dataset in gn""" query = fetch_probeset_query("UMUTAffyExon_0209_RMA") - _results = query_executor(query) + _results = query_executor(query, "UMUTAffyExon_0209_RMA") return {} def fetch_perf_functions(): """function to filter all functions strwith perf_""" - name_func_dict = {name: obj for name, obj in + name_func_dict = {name: func_obj for name, func_obj in getmembers(sys.modules[__name__], isfunction)if isfunction( - obj) and obj.__module__ == __name__ and name.startswith('perf_')} + func_obj) + and func_obj.__module__ == __name__ and name.startswith('perf_')} return name_func_dict def fetch_cmd_args(): - """function to fetch cmd args""" + """function to fetch cmd args\ + for example python file.py perf_hc_m2_dataset\ + output [perf_hc_m2_dataset obj]""" cmd_args = sys.argv[1:] name_func_dict = fetch_perf_functions() @@ -115,5 +122,5 @@ def fetch_cmd_args(): if __name__ == '__main__': func_list = fetch_cmd_args() - for call_func in func_list: - call_func() + for func in func_list: + func() -- cgit v1.2.3 From 348f48b68eb029f9b3f74c9b14de508f643ce3fe Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:40:18 +0300 Subject: add tests to setup packages --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e43ab0a..3f0922b 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ setup(author='Bonface M. K.', 'gn3.api', 'gn3.computations', 'gn3.db', + 'tests' ], url='https://github.com/genenetwork/genenetwork3', version='0.1') -- cgit v1.2.3 From 2431d8538625da97b07571c61f0d0dd29c5d880a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 8 Jun 2021 03:48:04 +0300 Subject: pep8 formatting --- tests/performance/test_query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py index 222e8d6..2d05d26 100644 --- a/tests/performance/test_query.py +++ b/tests/performance/test_query.py @@ -31,7 +31,7 @@ def query_executor(query: str, fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() - print(f"Performance tests for {dataset_name}") + print(f"Performance test for {dataset_name}") with conn: cursor = conn.cursor() @@ -122,5 +122,5 @@ def fetch_cmd_args(): if __name__ == '__main__': func_list = fetch_cmd_args() - for func in func_list: - func() + for func_obj in func_list: + func_obj() -- cgit v1.2.3 From 72b3396ac8ccb4023122bd66d93c7cc63b4b6e6f Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:23:58 +0300 Subject: rename perf query file --- tests/performance/test_query.py | 126 ---------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 tests/performance/test_query.py diff --git a/tests/performance/test_query.py b/tests/performance/test_query.py deleted file mode 100644 index 2d05d26..0000000 --- a/tests/performance/test_query.py +++ /dev/null @@ -1,126 +0,0 @@ -"""module contains performance tests for queries""" - -import time -import sys - -from inspect import getmembers -from inspect import isfunction - -from typing import Optional -from functools import wraps -from gn3.db_utils import database_connector - - -def timer(func): - """time function""" - @wraps(func) - def wrapper_time(*args, **kwargs): - """time wrapper""" - start_time = time.perf_counter() - results = func(*args, **kwargs) - end_time = time.perf_counter() - run_time = end_time - start_time - print(f"the time taken is {run_time:.3f} seconds") - return results - - return wrapper_time - - -def query_executor(query: str, - dataset_name: Optional[str] = "dataset_name", - fetch_all: bool = True): - """function to execute a query""" - conn, _ = database_connector() - print(f"Performance test for {dataset_name}") - - with conn: - cursor = conn.cursor() - cursor.execute(query) - - if fetch_all: - return cursor.fetchall() - return cursor.fetchone() - - -def fetch_probeset_query(dataset_name: str): - """contains queries for datasets""" - - query = """SELECT * from ProbeSetData - where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, - 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, - 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, - 487, 105, 106, 110, 115,116, 117, 118, 119, - 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, - 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, - 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) - and id in (SELECT ProbeSetXRef.DataId - FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) - - return query - - -@timer -def perf_simple_query(): - """initial simple query test""" - - query = """select * from ProbeSetData limit 1""" - - _results = query_executor(query) - - return {} - - -@timer -def perf_hc_m2_dataset(): - """test the default dataset HC_M2_0606_P""" - - query = fetch_probeset_query("HC_M2_0606_P") - - _results = query_executor(query, "HC_M2_0606_P") - - return {} - - -@timer -def perf_umutaffyexon_dataset(): - """largest dataset in gn""" - - query = fetch_probeset_query("UMUTAffyExon_0209_RMA") - _results = query_executor(query, "UMUTAffyExon_0209_RMA") - return {} - - -def fetch_perf_functions(): - """function to filter all functions strwith perf_""" - name_func_dict = {name: func_obj for name, func_obj in - getmembers(sys.modules[__name__], isfunction)if isfunction( - func_obj) - and func_obj.__module__ == __name__ and name.startswith('perf_')} - - return name_func_dict - - -def fetch_cmd_args(): - """function to fetch cmd args\ - for example python file.py perf_hc_m2_dataset\ - output [perf_hc_m2_dataset obj]""" - cmd_args = sys.argv[1:] - - name_func_dict = fetch_perf_functions() - - if len(cmd_args) > 0: - callables = [func_call for name, - func_call in name_func_dict.items() if name in cmd_args] - - return callables - - return list(name_func_dict.values()) - - -if __name__ == '__main__': - func_list = fetch_cmd_args() - for func_obj in func_list: - func_obj() -- cgit v1.2.3 From 16fc8e6be584299eb839bf1be54d7ee472d2321b Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:24:36 +0300 Subject: refactor perf query functions --- tests/performance/perf_query.py | 113 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/performance/perf_query.py diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py new file mode 100644 index 0000000..594b9ea --- /dev/null +++ b/tests/performance/perf_query.py @@ -0,0 +1,113 @@ +"""module contains performance tests for queries""" + +import time +import sys + +from inspect import getmembers +from inspect import isfunction + +from typing import Optional +from functools import wraps +from gn3.db_utils import database_connector + + +def timer(func): + """time function""" + @wraps(func) + def wrapper_time(*args, **kwargs): + """time wrapper""" + start_time = time.perf_counter() + results = func(*args, **kwargs) + end_time = time.perf_counter() + run_time = end_time - start_time + print(f"the time taken is {run_time:.3f} seconds") + return results + + return wrapper_time + + +def query_executor(query: str, + dataset_name: Optional[str] = "dataset_name", + fetch_all: bool = True): + """function to execute a query""" + conn, _ = database_connector() + + with conn: + cursor = conn.cursor() + cursor.execute(query) + + if fetch_all: + return cursor.fetchall() + return cursor.fetchone() + + +def fetch_probeset_query(dataset_name: str): + """contains queries for datasets""" + + query = """SELECT * from ProbeSetData + where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, + 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, + 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, + 487, 105, 106, 110, 115,116, 117, 118, 119, + 120, 919, 147, 121, 40, 41, 124, 125, 128, 135, + 129, 130, 131, 132, 134, 138, 139, 140, 141, 142, + 144, 145, 148, 149, 920, 922, 2, 3, 1, 1100) + and id in (SELECT ProbeSetXRef.DataId + FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = '{}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) + + return query + + +@timer +def perf_hc_m2_dataset(): + """test the default dataset HC_M2_0606_P""" + + dataset_name = "HC_M2_0606_P" + print(f"Performance test for {dataset_name}") + + query_executor(fetch_probeset_query(dataset_name=dataset_name), + dataset_name=dataset_name) + + +@timer +def perf_umutaffyexon_dataset(): + """largest dataset in gn""" + + dataset_name = "UMUTAffyExon_0209_RMA" + print(f"Performance test for {dataset_name}") + query_executor(fetch_probeset_query(dataset_name=dataset_name), + dataset_name=dataset_name) + + +def fetch_perf_functions(): + """function to filter all functions strwith perf_""" + name_func_dict = {name: func_obj for name, func_obj in + getmembers(sys.modules[__name__], isfunction)if isfunction( + func_obj) + and func_obj.__module__ == __name__ and name.startswith('perf_')} + + return name_func_dict + + +def fetch_cmd_args(): + """function to fetch cmd args""" + cmd_args = sys.argv[1:] + + name_func_dict = fetch_perf_functions() + + if len(cmd_args) > 0: + callables = [func_call for name, + func_call in name_func_dict.items() if name in cmd_args] + + return callables + + return list(name_func_dict.values()) + + +if __name__ == '__main__': + func_list = fetch_cmd_args() + for func_obj in func_list: + func_obj() -- cgit v1.2.3 From c361cc5c16914e43689db8b7f723c32986e4cc20 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:27:20 +0300 Subject: remove ununsed variables --- tests/performance/perf_query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py index 594b9ea..93bb3b2 100644 --- a/tests/performance/perf_query.py +++ b/tests/performance/perf_query.py @@ -27,7 +27,6 @@ def timer(func): def query_executor(query: str, - dataset_name: Optional[str] = "dataset_name", fetch_all: bool = True): """function to execute a query""" conn, _ = database_connector() -- cgit v1.2.3 From 9d6af6049fa73c6aae4d224245d319e87bccbd6a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 9 Jun 2021 20:31:43 +0300 Subject: minor fixes --- tests/performance/perf_query.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py index 93bb3b2..12cb944 100644 --- a/tests/performance/perf_query.py +++ b/tests/performance/perf_query.py @@ -6,7 +6,6 @@ import sys from inspect import getmembers from inspect import isfunction -from typing import Optional from functools import wraps from gn3.db_utils import database_connector @@ -67,8 +66,7 @@ def perf_hc_m2_dataset(): dataset_name = "HC_M2_0606_P" print(f"Performance test for {dataset_name}") - query_executor(fetch_probeset_query(dataset_name=dataset_name), - dataset_name=dataset_name) + query_executor(fetch_probeset_query(dataset_name=dataset_name)) @timer @@ -77,8 +75,7 @@ def perf_umutaffyexon_dataset(): dataset_name = "UMUTAffyExon_0209_RMA" print(f"Performance test for {dataset_name}") - query_executor(fetch_probeset_query(dataset_name=dataset_name), - dataset_name=dataset_name) + query_executor(fetch_probeset_query(dataset_name=dataset_name)) def fetch_perf_functions(): -- cgit v1.2.3