From aaff8b8ac968bce9821d6fef22b1296247a9df09 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Thu, 24 Feb 2022 13:46:34 +0530 Subject: gn3: Explicitly specify UTF-8 to be the file encoding. When the encoding is not specified explicitly, the system default encoding is used. This is not recommended. * gn3/computations/ctl.py (call_ctl_script), gn3/computations/gemma.py (generate_pheno_txt_file), gn3/computations/parsers.py (parse_genofile), gn3/computations/partial_correlations.py (partial_correlations_fast), gn3/computations/rqtl.py (process_rqtl_output, process_perm_output), gn3/computations/wgcna.py (dump_wgcna_data, call_wgcna_script), gn3/fs_helpers.py (jsonfile_to_dict): Explicitly specify UTF-8 to be the file encoding. * tests/unit/computations/test_gemma.py (TestGemma.test_generate_pheno_txt_file), tests/unit/computations/test_wgcna.py (TestWgcna.test_create_json_file): Test for call to open with encoding='utf-8' argument. --- gn3/computations/ctl.py | 2 +- gn3/computations/gemma.py | 2 +- gn3/computations/parsers.py | 2 +- gn3/computations/partial_correlations.py | 2 +- gn3/computations/rqtl.py | 4 ++-- gn3/computations/wgcna.py | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'gn3/computations') diff --git a/gn3/computations/ctl.py b/gn3/computations/ctl.py index 238740f..f881410 100644 --- a/gn3/computations/ctl.py +++ b/gn3/computations/ctl.py @@ -16,7 +16,7 @@ def call_ctl_script(data): cmd = compose_wgcna_cmd("ctl_analysis.R", temp_file_name) cmd_results = run_cmd(cmd) - with open(temp_file_name, "r") as outputfile: + with open(temp_file_name, "r", encoding="utf-8") as outputfile: if cmd_results["code"] != 0: return (cmd_results, None) output_file_data = json.load(outputfile) diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py index 0b22d3c..8036a7b 100644 --- a/gn3/computations/gemma.py +++ b/gn3/computations/gemma.py @@ -31,7 +31,7 @@ def generate_pheno_txt_file(trait_filename: str, # Early return if this already exists! if os.path.isfile(f"{tmpdir}/gn2/{trait_filename}"): return f"{tmpdir}/gn2/{trait_filename}" - with open(f"{tmpdir}/gn2/{trait_filename}", "w") as _file: + with open(f"{tmpdir}/gn2/{trait_filename}", "w", encoding="utf-8") as _file: for value in values: if value == "x": _file.write("NA\n") diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py index 1af35d6..79e3955 100644 --- a/gn3/computations/parsers.py +++ b/gn3/computations/parsers.py @@ -15,7 +15,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str], 'u': None, } genotypes, samples = [], [] - with open(file_path, "r") as _genofile: + with open(file_path, "r", encoding="utf-8") as _genofile: for line in _genofile: line = line.strip() if line.startswith(("#", "@")): diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 7110cc5..e826a8b 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -241,7 +241,7 @@ def partial_correlations_fast(# pylint: disable=[R0913, R0914] function in GeneNetwork1. """ assert method in ("spearman", "pearson") - with open(database_filename, "r") as dataset_file: # pytest: disable=[W1514] + with open(database_filename, "r", encoding="utf-8") as dataset_file: # pytest: disable=[W1514] dataset = tuple(dataset_file.readlines()) good_dataset_samples = good_dataset_samples_indexes( diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py index 0433b3f..b3539a9 100644 --- a/gn3/computations/rqtl.py +++ b/gn3/computations/rqtl.py @@ -56,7 +56,7 @@ def process_rqtl_output(file_name: str) -> List: # Later I should probably redo this using csv.read to avoid the # awkwardness with removing quotes with [1:-1] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", file_name), "r") as the_file: + "output", file_name), "r", encoding="utf-8") as the_file: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: @@ -88,7 +88,7 @@ def process_perm_output(file_name: str): """ perm_results = [] with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), - "output", "PERM_" + file_name), "r") as the_file: + "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file: for i, line in enumerate(the_file): if i == 0: # Skip header line diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py index ab12fe7..de26f48 100644 --- a/gn3/computations/wgcna.py +++ b/gn3/computations/wgcna.py @@ -19,7 +19,7 @@ def dump_wgcna_data(request_data: dict): request_data["TMPDIR"] = TMPDIR - with open(temp_file_path, "w") as output_file: + with open(temp_file_path, "w", encoding="utf-8") as output_file: json.dump(request_data, output_file) return temp_file_path @@ -75,7 +75,7 @@ def call_wgcna_script(rscript_path: str, request_data: dict): run_cmd_results = run_cmd(cmd) - with open(generated_file, "r") as outputfile: + with open(generated_file, "r", encoding="utf-8") as outputfile: if run_cmd_results["code"] != 0: return run_cmd_results -- cgit v1.2.3