From aaff8b8ac968bce9821d6fef22b1296247a9df09 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Thu, 24 Feb 2022 13:46:34 +0530 Subject: gn3: Explicitly specify UTF-8 to be the file encoding. When the encoding is not specified explicitly, the system default encoding is used. This is not recommended. * gn3/computations/ctl.py (call_ctl_script), gn3/computations/gemma.py (generate_pheno_txt_file), gn3/computations/parsers.py (parse_genofile), gn3/computations/partial_correlations.py (partial_correlations_fast), gn3/computations/rqtl.py (process_rqtl_output, process_perm_output), gn3/computations/wgcna.py (dump_wgcna_data, call_wgcna_script), gn3/fs_helpers.py (jsonfile_to_dict): Explicitly specify UTF-8 to be the file encoding. * tests/unit/computations/test_gemma.py (TestGemma.test_generate_pheno_txt_file), tests/unit/computations/test_wgcna.py (TestWgcna.test_create_json_file): Test for call to open with encoding='utf-8' argument. --- gn3/computations/ctl.py | 2 +- gn3/computations/gemma.py | 2 +- gn3/computations/parsers.py | 2 +- gn3/computations/partial_correlations.py | 2 +- gn3/computations/rqtl.py | 4 ++-- gn3/computations/wgcna.py | 4 ++-- gn3/fs_helpers.py | 2 +- guix-system.scm | 15 ++++++++------- tests/unit/computations/test_gemma.py | 2 +- tests/unit/computations/test_wgcna.py | 2 +- 10 files changed, 19 insertions(+), 18 deletions(-) diff --git a/gn3/computations/ctl.py b/gn3/computations/ctl.py index 238740f..f881410 100644 --- a/gn3/computations/ctl.py +++ b/gn3/computations/ctl.py @@ -16,7 +16,7 @@ def call_ctl_script(data): cmd = compose_wgcna_cmd("ctl_analysis.R", temp_file_name) cmd_results = run_cmd(cmd) - with open(temp_file_name, "r") as outputfile: + with open(temp_file_name, "r", encoding="utf-8") as outputfile: if cmd_results["code"] != 0: return (cmd_results, None) output_file_data = json.load(outputfile) diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py index 0b22d3c..8036a7b 100644 --- a/gn3/computations/gemma.py +++ b/gn3/computations/gemma.py @@ -31,7 +31,7 @@ def generate_pheno_txt_file(trait_filename: str, # Early return if this already exists! if os.path.isfile(f"{tmpdir}/gn2/{trait_filename}"): return f"{tmpdir}/gn2/{trait_filename}" - with open(f"{tmpdir}/gn2/{trait_filename}", "w") as _file: + with open(f"{tmpdir}/gn2/{trait_filename}", "w", encoding="utf-8") as _file: for value in values: if value == "x": _file.write("NA\n") diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py index 1af35d6..79e3955 100644 --- a/gn3/computations/parsers.py +++ b/gn3/computations/parsers.py @@ -15,7 +15,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str], 'u': None, } genotypes, samples = [], [] - with open(file_path, "r") as _genofile: + with open(file_path, "r", encoding="utf-8") as _genofile: for line in _genofile: line = line.strip() if line.startswith(("#", "@")): diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 7110cc5..e826a8b 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -241,7 +241,7 @@ def partial_correlations_fast(# pylint: disable=[R0913, R0914] function in GeneNetwork1. """ assert method in ("spearman", "pearson") - with open(database_filename, "r") as dataset_file: # pytest: disable=[W1514] + with open(database_filename, "r", encoding="utf-8") as dataset_file: # pytest: disable=[W1514] dataset = tuple(dataset_file.readlines()) good_dataset_samples = good_dataset_samples_indexes( diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..b3539a9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -56,7 +56,7 @@ def process_rqtl_output(file_name: str) -> List: # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", file_name), "r", encoding="utf-8") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: @@ -88,7 +88,7 @@ def process_perm_output(file_name: str): """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "PERM_" + file_name), "r") as the_file: + "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py index ab12fe7..de26f48 100644 --- a/gn3/computations/wgcna.py +++ b/gn3/computations/wgcna.py @@ -19,7 +19,7 @@ def dump_wgcna_data(request_data: dict): request_data["TMPDIR"] = TMPDIR - with open(temp_file_path, "w") as output_file: + with open(temp_file_path, "w", encoding="utf-8") as output_file: json.dump(request_data, output_file) return temp_file_path @@ -75,7 +75,7 @@ def call_wgcna_script(rscript_path: str, request_data: dict): run_cmd_results = run_cmd(cmd) - with open(generated_file, "r") as outputfile: + with open(generated_file, "r", encoding="utf-8") as outputfile: if run_cmd_results["code"] != 0: return run_cmd_results diff --git a/gn3/fs_helpers.py b/gn3/fs_helpers.py index 73f6567..578269b 100644 --- a/gn3/fs_helpers.py +++ b/gn3/fs_helpers.py @@ -41,7 +41,7 @@ def get_dir_hash(directory: str) -> str: def jsonfile_to_dict(json_file: str) -> Dict: """Give a JSON_FILE, return a python dict""" - with open(json_file) as _file: + with open(json_file, encoding="utf-8") as _file: data = json.load(_file) return data raise FileNotFoundError diff --git a/guix-system.scm b/guix-system.scm index c154d01..7142cea 100644 --- a/guix-system.scm +++ b/guix-system.scm @@ -111,10 +111,11 @@ function." %base-file-systems)) (users %base-user-accounts) (packages %base-packages) - (services (cons* (service virtuoso-service-type - (virtuoso-configuration - (http-server-port 8891))) - (service genenetwork3-service-type - (genenetwork3-configuration - (port 5000))) - %base-services))) + (services (cons* + ;; (service virtuoso-service-type + ;; (virtuoso-configuration + ;; (http-server-port 8891))) + (service genenetwork3-service-type + (genenetwork3-configuration + (port 5000))) + %base-services))) diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py index b36a93e..137c95c 100644 --- a/tests/unit/computations/test_gemma.py +++ b/tests/unit/computations/test_gemma.py @@ -22,7 +22,7 @@ class TestGemma(unittest.TestCase): self.assertEqual(_file, ("/tmp/gn2/phenotype_" "P7y6QWnwBPedSZdL0+m/GQ.txt")) open_mock.assert_called_with(("/tmp/gn2/phenotype_" - "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w") + "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w", encoding="utf-8") open_mock.return_value.write.assert_has_calls([ mock.call("NA\n"), mock.call("NA\n"), diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py index 3130374..a9108b0 100644 --- a/tests/unit/computations/test_wgcna.py +++ b/tests/unit/computations/test_wgcna.py @@ -166,7 +166,7 @@ class TestWgcna(TestCase): expected_input) file_handler.assert_called_once_with( - "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w') + "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w', encoding='utf-8') self.assertEqual( results, "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json") -- cgit v1.2.3