From 2236508f24c8c266a6a9dff28a1307bb3d0dd31e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:07:09 +0300 Subject: tests: test_run_mapping: Add missing positional argument "vals_hash" --- wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index c220a072..31f56c07 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -181,7 +181,8 @@ class TestRunMapping(unittest.TestCase): with mock.patch("wqflask.marker_regression.run_mapping.datetime.datetime", new=datetime_mock): export_mapping_results(dataset=self.dataset, trait=self.trait, markers=markers, results_path="~/results", mapping_scale="physic", score_type="-log(p)", - transform="qnorm", covariates="Dataset1:Trait1,Dataset2:Trait2", n_samples="100") + transform="qnorm", covariates="Dataset1:Trait1,Dataset2:Trait2", n_samples="100", + vals_hash="") write_calls = [ mock.call('Time/Date: 09/01/19 / 10:12:12\n'), -- cgit v1.2.3 From 321632bf70c72ed987ba7c4f605dd92deaa62380 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:10:04 +0300 Subject: tests: test_run_mapping: Add missing attribute to "self.trait" --- wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index 31f56c07..26903eed 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -47,7 +47,7 @@ class TestRunMapping(unittest.TestCase): self.chromosomes = AttributeSetter({"chromosomes": chromosomes}) self.trait = AttributeSetter( - {"symbol": "IGFI", "chr": "X1", "mb": 123313}) + {"symbol": "IGFI", "chr": "X1", "mb": 123313, "display_name": "Test Name"}) def tearDown(self): self.dataset = AttributeSetter( -- cgit v1.2.3 From e97719eea27ede1b3e943e6d21d18e5d383679f0 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:12:29 +0300 Subject: tests: test_run_mapping: Fix failing assert when getting perm strata --- wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index 26903eed..3e9e4ef3 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -233,25 +233,20 @@ class TestRunMapping(unittest.TestCase): "c1": "c1_value", "c2": "c2_value", "w1": "w1_value" - }, "S2": { "w1": "w2_value", "w2": "w2_value" - }, "S3": { "c1": "c1_value", "c2": "c2_value" - }, - }}) - results = get_perm_strata(this_trait={}, sample_list=sample_list, categorical_vars=categorical_vars, used_samples=used_samples) - self.assertEqual(results, [2, 1]) + self.assertEqual(results, [1, 1]) def test_get_chr_length(self): """test for getting chromosome length""" -- cgit v1.2.3 From 1f5be42d8f090fd4fe77a9275b12a9c9b1383d09 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:19:52 +0300 Subject: tests: test_run_mapping: Add missing calls After adding the missing attribute to "self.trait", we need to also need to update some missing calls. --- wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index 3e9e4ef3..3747aeb8 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -188,6 +188,8 @@ class TestRunMapping(unittest.TestCase): mock.call('Time/Date: 09/01/19 / 10:12:12\n'), mock.call('Population: Human GP1_\n'), mock.call( 'Data Set: dataser_1\n'), + mock.call('Trait: Test Name\n'), + mock.call('Trait Hash: \n'), mock.call('N Samples: 100\n'), mock.call( 'Transform - Quantile Normalized\n'), mock.call('Gene Symbol: IGFI\n'), mock.call( -- cgit v1.2.3 From a4ec2505fb9dd6f0480c9b452fd623e038c07939 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:28:33 +0300 Subject: tests: test_run_mapping: Add missing "name" attribute to tests --- .../unit/wqflask/marker_regression/test_gemma_mapping.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py index 4003d68f..58a44b2a 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py @@ -81,10 +81,12 @@ class TestGemmaMapping(unittest.TestCase): def test_gen_pheno_txt_file(self): """add tests for generating pheno txt file""" with mock.patch("builtins.open", mock.mock_open())as mock_open: - gen_pheno_txt_file(this_dataset={}, genofile_name="", vals=[ - "x", "w", "q", "we", "R"], trait_filename="fitr.re") + gen_pheno_txt_file( + this_dataset=AttributeSetter({"name": "A"}), + genofile_name="", vals=[ + "x", "w", "q", "we", "R"]) mock_open.assert_called_once_with( - '/home/user/data/gn2/fitr.re.txt', 'w') + '/home/user/data/gn2/PHENO_KiAEKlCvM6iGTM9Kh_TAlQ.txt', 'w') filehandler = mock_open() values = ["x", "w", "q", "we", "R"] write_calls = [mock.call('NA\n'), mock.call('w\n'), mock.call( @@ -112,7 +114,7 @@ class TestGemmaMapping(unittest.TestCase): create_trait.side_effect = create_trait_side_effect group = MockGroup({"name": "group_X", "samplelist": samplelist}) - this_dataset = AttributeSetter({"group": group}) + this_dataset = AttributeSetter({"group": group, "name": "A"}) flat_files.return_value = "Home/Genenetwork" with mock.patch("builtins.open", mock.mock_open())as mock_open: @@ -132,7 +134,7 @@ class TestGemmaMapping(unittest.TestCase): flat_files.assert_called_once_with('mapping') mock_open.assert_called_once_with( - 'Home/Genenetwork/group_X_covariates.txt', 'w') + 'Home/Genenetwork/COVAR_anFZ_LfZYV0Ulywo+7tRCw.txt', 'w') filehandler = mock_open() filehandler.write.assert_has_calls([mock.call( '-9\t'), mock.call('-9\t'), mock.call('-9\t'), mock.call('-9\t'), mock.call('\n')]) -- cgit v1.2.3 From 7a15d24a6598f30801dd897ddc72d3773641e7bd Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 5 Oct 2021 11:32:17 +0300 Subject: doc: docker-container.org: Remove python2 gn2 docker set-up command --- doc/docker-container.org | 7 ------- 1 file changed, 7 deletions(-) diff --git a/doc/docker-container.org b/doc/docker-container.org index ef0d71fc..79b8272f 100644 --- a/doc/docker-container.org +++ b/doc/docker-container.org @@ -28,13 +28,6 @@ which will be added to a base mariaDB image. First create the gn2 tar archive by running: #+begin_src sh -# For the Python 2 version: -env GUIX_PACKAGE_PATH="/home/bonface/projects/guix-bioinformatics:/home/bonface/projects/guix-past/modules" \ - ./pre-inst-env guix pack --no-grafts\ - -S /gn2-profile=/ \ - screen python2-genenetwork2 - -# For the Python 3 version: env GUIX_PACKAGE_PATH="/home/bonface/projects/guix-bioinformatics:/home/bonface/projects/guix-past/modules" \ ./pre-inst-env guix pack --no-grafts\ -S /gn2-profile=/ \ -- cgit v1.2.3 From 949789a00d8e6e901cc18b939737cd42e14c0236 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 16:12:22 +0300 Subject: scripts: group: Use a unique key to identify a group --- scripts/authentication/group.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index 76c7fb4f..eea13efe 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -29,6 +29,7 @@ import argparse import datetime import redis import json +import uuid from typing import Dict, List, Optional, Set @@ -71,26 +72,31 @@ def create_group_data(users: Dict, target_group: str, me@test2.com, me@test3.com" """ + # Emails + _members: Set = set("".join(members.split()).split(",") + if members else []) + _admins: Set = set("".join(admins.split()).split(",") + if admins else []) - _members: List = "".join(members.split()).split(",") if members else [] - _admins: List = "".join(admins.split()).split(",") if admins else [] + # Unique IDs + member_ids: Set = set() + admin_ids: Set = set() - user_ids: Dict = dict() for user_id, user_details in users.items(): _details = json.loads(user_details) - if _details.get("email_address"): - user_ids[_details.get("email_address")] = user_id - print(user_ids) + if _details.get("email_address") in _members: + member_ids.add(user_id) + if _details.get("email_address") in _admins: + admin_ids.add(user_id) + + timestamp: str = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') return {"key": "groups", - "field": target_group, + "field": str(uuid.uuid4()), "value": json.dumps({ - "id": target_group, "name": target_group, - "admins": [user_ids[admin] for admin in _admins - if admin in user_ids], - "members": [user_ids[member] for member in _members - if member in user_ids], - "changed_timestamp": datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + "admins": list(admin_ids), + "members": list(member_ids), + "changed_timestamp": timestamp, })} -- cgit v1.2.3 From 931c7eb07cc995118ba808df760fd74de036853f Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 16:15:31 +0300 Subject: scripts: group: Remove unused import --- scripts/authentication/group.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index eea13efe..d3f9a1e4 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -31,7 +31,8 @@ import redis import json import uuid -from typing import Dict, List, Optional, Set +from typing import Dict, Optional, Set + def create_group_data(users: Dict, target_group: str, members: Optional[str] = None, -- cgit v1.2.3 From 870edaf2cf8ce8588ee7c58d08fc1f307f7198ec Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 16:19:04 +0300 Subject: scripts: group: Remove empty `""` value for data.get data.get("field") will default to None if there is no value; and None is falsy. --- scripts/authentication/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index d3f9a1e4..1919d9db 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -131,10 +131,10 @@ if __name__ == "__main__": members=members, admins=admins) - if not REDIS_CONN.hget("groups", data.get("field", "")): updated_data = json.loads(data["value"]) updated_data["created_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') data["value"] = json.dumps(updated_data) + if not REDIS_CONN.hget("groups", data.get("field")): created_p = REDIS_CONN.hset(data.get("key", ""), data.get("field", ""), -- cgit v1.2.3 From c1b23a1b01071c252ddae6dbea14500e4c248d84 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:01:54 +0300 Subject: workflows: main.yml: Disable link checking --- .github/workflows/main.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f279a7e5..8e2c7966 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,11 +55,11 @@ jobs: GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ etc/default_settings.py -c -m unittest discover -v - - name: Test for Broken Links - run: | - env GN2_PROFILE=/gn2-profile \ - TMPDIR=/tmp\ - WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ - GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py -c \ - $PWD/test/requests/links_scraper/genelinks.py + # - name: Test for Broken Links + # run: | + # env GN2_PROFILE=/gn2-profile \ + # TMPDIR=/tmp\ + # WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ + # GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ + # etc/default_settings.py -c \ + # $PWD/test/requests/links_scraper/genelinks.py -- cgit v1.2.3 From 40dddc1a78a7808b480d26594ced689cdcc08c24 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:23:08 +0300 Subject: scripts: group: Fix indentation --- scripts/authentication/group.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index 1919d9db..7e73be15 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -131,10 +131,10 @@ if __name__ == "__main__": members=members, admins=admins) - updated_data = json.loads(data["value"]) - updated_data["created_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') - data["value"] = json.dumps(updated_data) if not REDIS_CONN.hget("groups", data.get("field")): + updated_data = json.loads(data["value"]) + updated_data["created_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + data["value"] = json.dumps(updated_data) created_p = REDIS_CONN.hset(data.get("key", ""), data.get("field", ""), -- cgit v1.2.3 From 67222a6cb11995eb5a4af58f63cc9385ccfb9226 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:24:16 +0300 Subject: scripts: group: Break up long line --- scripts/authentication/group.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index 7e73be15..ed17f260 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -133,7 +133,8 @@ if __name__ == "__main__": if not REDIS_CONN.hget("groups", data.get("field")): updated_data = json.loads(data["value"]) - updated_data["created_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_data["created_timestamp"] = timestamp data["value"] = json.dumps(updated_data) created_p = REDIS_CONN.hset(data.get("key", ""), -- cgit v1.2.3 From dc378d26c003a8f0503ad69235d1685d66e4d611 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:26:03 +0300 Subject: scripts: group: Update docstrings for "create_group_data" --- scripts/authentication/group.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index ed17f260..08a4a2bc 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -41,6 +41,9 @@ def create_group_data(users: Dict, target_group: str, "field", and "value" that can be used in a redis hash as follows: HSET key field value + The "field" return value is a unique-id that is used to + distinguish the groups. + Parameters: - `users`: a list of users for example: -- cgit v1.2.3 From d5f6670836cbed804a00e02ec0258d0c87564006 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:40:35 +0300 Subject: scripts: group: Replace args.group_name with data["field"] --- scripts/authentication/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/authentication/group.py b/scripts/authentication/group.py index 08a4a2bc..c8c2caad 100644 --- a/scripts/authentication/group.py +++ b/scripts/authentication/group.py @@ -145,7 +145,7 @@ if __name__ == "__main__": data.get("value", "")) groups = json.loads(REDIS_CONN.hget("groups", - args.group_name)) # type: ignore + data.get("field"))) # type: ignore if created_p: exit(f"\nSuccessfully created the group: '{args.group_name}'\n" f"`HGETALL groups {args.group_name}`: {groups}\n") -- cgit v1.2.3 From 70f8ed53f85cfb42ca81ed6c3b4c9cf1060940e5 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 6 Oct 2021 21:44:51 +0300 Subject: scripts: resource: Add option for specifying a groups uuid --- scripts/authentication/resource.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/authentication/resource.py b/scripts/authentication/resource.py index 8fcf09d7..4996f34c 100644 --- a/scripts/authentication/resource.py +++ b/scripts/authentication/resource.py @@ -63,12 +63,16 @@ def recover_hash(name: str, file_path: str, set_function) -> bool: if __name__ == "__main__": # Initialising the parser CLI arguments parser = argparse.ArgumentParser() + parser.add_argument("--group-id", + help="Add the group id to all resources") parser.add_argument("--restore", help="Restore from a given backup") parser.add_argument("--enable-backup", action="store_true", help="Create a back up before edits") args = parser.parse_args() + if not args.group_id: + exit("Please specify the group-id!\n") if args.restore: if recover_hash(name="resources", file_path=args.back_up, @@ -92,8 +96,8 @@ if __name__ == "__main__": for resource_id, resource in RESOURCES.items(): _resource = json.loads(resource) # str -> dict conversion - _resource["group_masks"] = {"editors": {"metadata": "edit", - "data": "edit"}} + _resource["group_masks"] = {args.group_id: {"metadata": "edit", + "data": "edit"}} REDIS_CONN.hset("resources", resource_id, json.dumps(_resource)) -- cgit v1.2.3 From 7805a48172ada364d3783db043dbcf637445a7fe Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 8 Oct 2021 22:07:43 +0000 Subject: Adding convert_dol_genotypes.py to scripts; everything is hard-coded in it since I was only writing it to generate a specific file and it probably won't be re-used --- scripts/convert_dol_genotypes.py | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 scripts/convert_dol_genotypes.py diff --git a/scripts/convert_dol_genotypes.py b/scripts/convert_dol_genotypes.py new file mode 100644 index 00000000..353f1b53 --- /dev/null +++ b/scripts/convert_dol_genotypes.py @@ -0,0 +1,68 @@ +# This is just to convert the Rqtl2 format genotype files for DOL into a .geno file +# Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly + +import os + +geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/" +markers_file = "/home/zas1024/gn2-zach/DO_genotypes/SNP_Map.txt" +gn_geno_path = "/home/zas1024/gn2-zach/DO_genotypes/DOL.geno" + +marker_data = {} +with open(markers_file, "r") as markers_fh: + for i, line in enumerate(markers_fh): + if i == 0: + continue + else: + line_items = line.split("\t") + this_marker = {} + this_marker['chr'] = line_items[2] if line_items[2] != "0" else "M" + this_marker['pos'] = f'{float(line_items[3])/1000000:.6f}' + marker_data[line_items[1]] = this_marker + +sample_names = [] +for filename in os.listdir(geno_dir): + if "gm4qtl2_geno" in filename: + with open(geno_dir + "/" + filename, "r") as rqtl_geno_fh: + for i, line in enumerate(rqtl_geno_fh): + line_items = line.split(",") + if i < 3: + continue + elif not len(sample_names) and i == 3: + sample_names = [item.replace("TLB", "TB") for item in line_items[1:]] + elif i > 3: + marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]] + +def sort_func(e): + try: + return int(e['chr']) + except: + if e['chr'] == "X": + return 20 + elif e['chr'] == "Y": + return 21 + elif e['chr'] == "M": + return 22 + +marker_list = [] +for key, value in marker_data.items(): + if 'genotypes' in value: + this_marker = { + 'chr': value['chr'], + 'locus': key, + 'pos': value['pos'], + 'genotypes': value['genotypes'] + } + marker_list.append(this_marker) + +marker_list.sort(key=sort_func) + +with open(gn_geno_path, "w") as gn_geno_fh: + gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names))) + for marker in marker_list: + row_contents = [ + marker['chr'], + marker['locus'], + marker['pos'], + marker['pos'] + ] + marker['genotypes'] + gn_geno_fh.write("\t".join(row_contents) + "\n") -- cgit v1.2.3 From b37a9c6c495d142852d0cee54d83f5c9e815e37b Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 8 Oct 2021 22:19:07 +0000 Subject: Fixed the sort to account for both chr and pos in a kind of hack-y way + added some comments + changed EOL to LF because the file suddenly started including EOL characters --- scripts/convert_dol_genotypes.py | 142 ++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 68 deletions(-) diff --git a/scripts/convert_dol_genotypes.py b/scripts/convert_dol_genotypes.py index 353f1b53..81b3bd6d 100644 --- a/scripts/convert_dol_genotypes.py +++ b/scripts/convert_dol_genotypes.py @@ -1,68 +1,74 @@ -# This is just to convert the Rqtl2 format genotype files for DOL into a .geno file -# Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly - -import os - -geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/" -markers_file = "/home/zas1024/gn2-zach/DO_genotypes/SNP_Map.txt" -gn_geno_path = "/home/zas1024/gn2-zach/DO_genotypes/DOL.geno" - -marker_data = {} -with open(markers_file, "r") as markers_fh: - for i, line in enumerate(markers_fh): - if i == 0: - continue - else: - line_items = line.split("\t") - this_marker = {} - this_marker['chr'] = line_items[2] if line_items[2] != "0" else "M" - this_marker['pos'] = f'{float(line_items[3])/1000000:.6f}' - marker_data[line_items[1]] = this_marker - -sample_names = [] -for filename in os.listdir(geno_dir): - if "gm4qtl2_geno" in filename: - with open(geno_dir + "/" + filename, "r") as rqtl_geno_fh: - for i, line in enumerate(rqtl_geno_fh): - line_items = line.split(",") - if i < 3: - continue - elif not len(sample_names) and i == 3: - sample_names = [item.replace("TLB", "TB") for item in line_items[1:]] - elif i > 3: - marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]] - -def sort_func(e): - try: - return int(e['chr']) - except: - if e['chr'] == "X": - return 20 - elif e['chr'] == "Y": - return 21 - elif e['chr'] == "M": - return 22 - -marker_list = [] -for key, value in marker_data.items(): - if 'genotypes' in value: - this_marker = { - 'chr': value['chr'], - 'locus': key, - 'pos': value['pos'], - 'genotypes': value['genotypes'] - } - marker_list.append(this_marker) - -marker_list.sort(key=sort_func) - -with open(gn_geno_path, "w") as gn_geno_fh: - gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names))) - for marker in marker_list: - row_contents = [ - marker['chr'], - marker['locus'], - marker['pos'], - marker['pos'] - ] + marker['genotypes'] - gn_geno_fh.write("\t".join(row_contents) + "\n") +# This is just to convert the Rqtl2 format genotype files for DOL into a .geno file +# Everything is hard-coded since I doubt this will be re-used and I just wanted to generate the file quickly + +import os + +geno_dir = "/home/zas1024/gn2-zach/DO_genotypes/" +markers_file = "/home/zas1024/gn2-zach/DO_genotypes/SNP_Map.txt" +gn_geno_path = "/home/zas1024/gn2-zach/DO_genotypes/DOL.geno" + +# Iterate through the SNP_Map.txt file to get marker positions +marker_data = {} +with open(markers_file, "r") as markers_fh: + for i, line in enumerate(markers_fh): + if i == 0: + continue + else: + line_items = line.split("\t") + this_marker = {} + this_marker['chr'] = line_items[2] if line_items[2] != "0" else "M" + this_marker['pos'] = f'{float(line_items[3])/1000000:.6f}' + marker_data[line_items[1]] = this_marker + +# Iterate through R/qtl2 format genotype files and pull out the samplelist and genotypes for each marker +sample_names = [] +for filename in os.listdir(geno_dir): + if "gm4qtl2_geno" in filename: + with open(geno_dir + "/" + filename, "r") as rqtl_geno_fh: + for i, line in enumerate(rqtl_geno_fh): + line_items = line.split(",") + if i < 3: + continue + elif not len(sample_names) and i == 3: + sample_names = [item.replace("TLB", "TB") for item in line_items[1:]] + elif i > 3: + marker_data[line_items[0]]['genotypes'] = ["X" if item.strip() == "-" else item.strip() for item in line_items[1:]] + +# Generate list of marker obs to iterate through when writing to .geno file +marker_list = [] +for key, value in marker_data.items(): + if 'genotypes' in value: + this_marker = { + 'chr': value['chr'], + 'locus': key, + 'pos': value['pos'], + 'genotypes': value['genotypes'] + } + marker_list.append(this_marker) + +def sort_func(e): + """For ensuring that X/Y chromosomes/mitochondria are sorted to the end correctly""" + try: + return float((e['chr']))*1000 + float(e['pos']) + except: + if e['chr'] == "X": + return 20000 + float(e['pos']) + elif e['chr'] == "Y": + return 21000 + float(e['pos']) + elif e['chr'] == "M": + return 22000 + float(e['pos']) + +# Sort markers by chromosome +marker_list.sort(key=sort_func) + +# Write lines to .geno file +with open(gn_geno_path, "w") as gn_geno_fh: + gn_geno_fh.write("\t".join((["Chr", "Locus", "cM", "Mb"] + sample_names))) + for marker in marker_list: + row_contents = [ + marker['chr'], + marker['locus'], + marker['pos'], + marker['pos'] + ] + marker['genotypes'] + gn_geno_fh.write("\t".join(row_contents) + "\n") -- cgit v1.2.3 From e473a210491620477898ba69f33f69f14fdf5893 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 8 Oct 2021 22:49:20 +0000 Subject: Fix issue where outliers weren't being highlighted for rows drawn by Scroller (so all rows beyond a certain point) --- wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js | 2 ++ wqflask/wqflask/templates/show_trait.html | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js b/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js index 4de1b0ac..0a060cdc 100644 --- a/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js +++ b/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js @@ -130,6 +130,7 @@ var primary_table = $('#samples_primary').DataTable( { $(row).addClass("value_se"); if (data.outlier) { $(row).addClass("outlier"); + $(row).attr("style", "background-color: orange;"); } $('td', row).eq(1).addClass("column_name-Index") $('td', row).eq(2).addClass("column_name-Sample") @@ -189,6 +190,7 @@ if (js_data.sample_lists.length > 1){ $(row).addClass("value_se"); if (data.outlier) { $(row).addClass("outlier"); + $(row).attr("style", "background-color: orange;"); } $('td', row).eq(1).addClass("column_name-Index") $('td', row).eq(2).addClass("column_name-Sample") diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 3dbf5f57..f3fa1332 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -254,8 +254,6 @@ } ); {% endif %} - $('#samples_primary, #samples_other').find("tr.outlier").css('background-color', 'orange') - $('.edit_sample_checkbox:checkbox').change(function() { if ($(this).is(":checked")) { if (!$(this).closest('tr').hasClass('selected')) { -- cgit v1.2.3 From 9b28d111ad156f3862286e88bc220e02d5e1312b Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 8 Oct 2021 23:31:19 +0000 Subject: Fixed some issues with scale and score_type in mapping export + include mapping method in export filename --- wqflask/wqflask/marker_regression/run_mapping.py | 25 ++++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 290c4a14..80094057 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -104,7 +104,7 @@ class RunMapping: if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: - mapping_results_filename = "_".join([self.dataset.group.name, self.vals_hash]).replace("/", "_") + mapping_results_filename = "_".join([self.dataset.group.name, self.mapping_method, self.vals_hash]).replace("/", "_") self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) @@ -405,8 +405,8 @@ class RunMapping: total_markers = len(self.qtl_results) with Bench("Exporting Results"): - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, - self.mapping_results_path, self.mapping_scale, self.score_type, + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_method, self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples, self.vals_hash) with Bench("Trimming Markers for Figure"): @@ -525,7 +525,11 @@ class RunMapping: return trimmed_genotype_data -def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type, transform, covariates, n_samples, vals_hash): +def export_mapping_results(dataset, trait, markers, results_path, mapping_method, mapping_scale, score_type, transform, covariates, n_samples, vals_hash): + if mapping_scale == "physic": + scale_string = "Mb" + else: + scale_string = "cM" with open(results_path, "w+") as output_file: output_file.write( "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") @@ -535,6 +539,7 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write("Trait: " + trait.display_name + "\n") output_file.write("Trait Hash: " + vals_hash + "\n") output_file.write("N Samples: " + str(n_samples) + "\n") + output_file.write("Mapping Tool: " + str(mapping_method) + "\n") if len(transform) > 0: transform_text = "Transform - " if transform == "qnorm": @@ -564,10 +569,7 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write("Name,Chr,") if score_type.lower() == "-logP": score_type = "-logP" - if 'Mb' in markers[0]: - output_file.write("Mb," + score_type) - if 'cM' in markers[0]: - output_file.write("Cm," + score_type) + output_file.write(scale_string + "," + score_type) if "additive" in list(markers[0].keys()): output_file.write(",Additive") if "dominance" in list(markers[0].keys()): @@ -575,11 +577,8 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write("\n") for i, marker in enumerate(markers): output_file.write(marker['name'] + "," + str(marker['chr']) + ",") - if 'Mb' in marker: - output_file.write(str(marker['Mb']) + ",") - if 'cM' in marker: - output_file.write(str(marker['cM']) + ",") - if "lod_score" in marker.keys(): + output_file.write(str(marker[scale_string]) + ",") + if score_type == "-logP": output_file.write(str(marker['lod_score'])) else: output_file.write(str(marker['lrs_value'])) -- cgit v1.2.3 From a212ad123f902b6a9c74bcac1d98bc274cebbdda Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 12 Oct 2021 17:36:02 +0000 Subject: Fixed export_mapping_results test in test_run_mapping.py --- .../wqflask/marker_regression/test_run_mapping.py | 33 +++++++++++----------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index 3747aeb8..868b0b0b 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -43,7 +43,7 @@ class TestRunMapping(unittest.TestCase): }) } self.dataset = AttributeSetter( - {"fullname": "dataser_1", "group": self.group, "type": "ProbeSet"}) + {"fullname": "dataset_1", "group": self.group, "type": "ProbeSet"}) self.chromosomes = AttributeSetter({"chromosomes": chromosomes}) self.trait = AttributeSetter( @@ -180,37 +180,36 @@ class TestRunMapping(unittest.TestCase): with mock.patch("wqflask.marker_regression.run_mapping.datetime.datetime", new=datetime_mock): export_mapping_results(dataset=self.dataset, trait=self.trait, markers=markers, - results_path="~/results", mapping_scale="physic", score_type="-log(p)", - transform="qnorm", covariates="Dataset1:Trait1,Dataset2:Trait2", n_samples="100", - vals_hash="") + results_path="~/results", mapping_method="gemma", mapping_scale="physic", + score_type="-logP", transform="qnorm", + covariates="Dataset1:Trait1,Dataset2:Trait2", + n_samples="100", vals_hash="") write_calls = [ mock.call('Time/Date: 09/01/19 / 10:12:12\n'), mock.call('Population: Human GP1_\n'), mock.call( - 'Data Set: dataser_1\n'), + 'Data Set: dataset_1\n'), mock.call('Trait: Test Name\n'), mock.call('Trait Hash: \n'), - mock.call('N Samples: 100\n'), mock.call( - 'Transform - Quantile Normalized\n'), + mock.call('N Samples: 100\n'), + mock.call('Mapping Tool: gemma\n'), + mock.call('Transform - Quantile Normalized\n'), mock.call('Gene Symbol: IGFI\n'), mock.call( 'Location: X1 @ 123313 Mb\n'), mock.call('Cofactors (dataset - trait):\n'), mock.call('Trait1 - Dataset1\n'), mock.call('Trait2 - Dataset2\n'), mock.call('\n'), mock.call('Name,Chr,'), - mock.call('Mb,-log(p)'), mock.call('Cm,-log(p)'), + mock.call('Mb,-logP'), mock.call(',Additive'), mock.call(',Dominance'), mock.call('\n'), mock.call('MK1,C1,'), - mock.call('12000,'), mock.call('1,'), - mock.call('3'), mock.call(',VA'), - mock.call(',TT'), mock.call('\n'), - mock.call('MK2,C2,'), mock.call('10000,'), - mock.call('15,'), mock.call('7'), + mock.call('12000,'), mock.call('3'), + mock.call(',VA'), mock.call(',TT'), + mock.call('\n'), mock.call('MK2,C2,'), + mock.call('10000,'), mock.call('7'), mock.call('\n'), mock.call('MK1,C3,'), - mock.call('1,'), mock.call('45,'), - mock.call('7'), mock.call(',VE'), - mock.call(',Tt') - + mock.call('1,'), mock.call('7'), + mock.call(',VE'), mock.call(',Tt') ] mock_open.assert_called_once_with("~/results", "w+") filehandler = mock_open() -- cgit v1.2.3