aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2022-02-24 13:46:34 +0530
committerArun Isaac2022-02-24 14:04:06 +0530
commitaaff8b8ac968bce9821d6fef22b1296247a9df09 (patch)
tree87e7442b6f4114a64adeced052e3d3835b93a610
parent393700a432e25336c5afd1f008152202853f5bc2 (diff)
downloadgenenetwork3-aaff8b8ac968bce9821d6fef22b1296247a9df09.tar.gz
gn3: Explicitly specify UTF-8 to be the file encoding.
When the encoding is not specified explicitly, the system default encoding is used. This is not recommended. * gn3/computations/ctl.py (call_ctl_script), gn3/computations/gemma.py (generate_pheno_txt_file), gn3/computations/parsers.py (parse_genofile), gn3/computations/partial_correlations.py (partial_correlations_fast), gn3/computations/rqtl.py (process_rqtl_output, process_perm_output), gn3/computations/wgcna.py (dump_wgcna_data, call_wgcna_script), gn3/fs_helpers.py (jsonfile_to_dict): Explicitly specify UTF-8 to be the file encoding. * tests/unit/computations/test_gemma.py (TestGemma.test_generate_pheno_txt_file), tests/unit/computations/test_wgcna.py (TestWgcna.test_create_json_file): Test for call to open with encoding='utf-8' argument.
-rw-r--r--gn3/computations/ctl.py2
-rw-r--r--gn3/computations/gemma.py2
-rw-r--r--gn3/computations/parsers.py2
-rw-r--r--gn3/computations/partial_correlations.py2
-rw-r--r--gn3/computations/rqtl.py4
-rw-r--r--gn3/computations/wgcna.py4
-rw-r--r--gn3/fs_helpers.py2
-rw-r--r--guix-system.scm15
-rw-r--r--tests/unit/computations/test_gemma.py2
-rw-r--r--tests/unit/computations/test_wgcna.py2
10 files changed, 19 insertions, 18 deletions
diff --git a/gn3/computations/ctl.py b/gn3/computations/ctl.py
index 238740f..f881410 100644
--- a/gn3/computations/ctl.py
+++ b/gn3/computations/ctl.py
@@ -16,7 +16,7 @@ def call_ctl_script(data):
cmd = compose_wgcna_cmd("ctl_analysis.R", temp_file_name)
cmd_results = run_cmd(cmd)
- with open(temp_file_name, "r") as outputfile:
+ with open(temp_file_name, "r", encoding="utf-8") as outputfile:
if cmd_results["code"] != 0:
return (cmd_results, None)
output_file_data = json.load(outputfile)
diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py
index 0b22d3c..8036a7b 100644
--- a/gn3/computations/gemma.py
+++ b/gn3/computations/gemma.py
@@ -31,7 +31,7 @@ def generate_pheno_txt_file(trait_filename: str,
# Early return if this already exists!
if os.path.isfile(f"{tmpdir}/gn2/{trait_filename}"):
return f"{tmpdir}/gn2/{trait_filename}"
- with open(f"{tmpdir}/gn2/{trait_filename}", "w") as _file:
+ with open(f"{tmpdir}/gn2/{trait_filename}", "w", encoding="utf-8") as _file:
for value in values:
if value == "x":
_file.write("NA\n")
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
index 1af35d6..79e3955 100644
--- a/gn3/computations/parsers.py
+++ b/gn3/computations/parsers.py
@@ -15,7 +15,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
'u': None,
}
genotypes, samples = [], []
- with open(file_path, "r") as _genofile:
+ with open(file_path, "r", encoding="utf-8") as _genofile:
for line in _genofile:
line = line.strip()
if line.startswith(("#", "@")):
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 7110cc5..e826a8b 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -241,7 +241,7 @@ def partial_correlations_fast(# pylint: disable=[R0913, R0914]
function in GeneNetwork1.
"""
assert method in ("spearman", "pearson")
- with open(database_filename, "r") as dataset_file: # pytest: disable=[W1514]
+ with open(database_filename, "r", encoding="utf-8") as dataset_file: # pytest: disable=[W1514]
dataset = tuple(dataset_file.readlines())
good_dataset_samples = good_dataset_samples_indexes(
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 0433b3f..b3539a9 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -56,7 +56,7 @@ def process_rqtl_output(file_name: str) -> List:
# Later I should probably redo this using csv.read to avoid the
# awkwardness with removing quotes with [1:-1]
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
- "output", file_name), "r") as the_file:
+ "output", file_name), "r", encoding="utf-8") as the_file:
for line in the_file:
line_items = line.split(",")
if line_items[1][1:-1] == "chr" or not line_items:
@@ -88,7 +88,7 @@ def process_perm_output(file_name: str):
"""
perm_results = []
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
- "output", "PERM_" + file_name), "r") as the_file:
+ "output", "PERM_" + file_name), "r", encoding="utf-8") as the_file:
for i, line in enumerate(the_file):
if i == 0:
# Skip header line
diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py
index ab12fe7..de26f48 100644
--- a/gn3/computations/wgcna.py
+++ b/gn3/computations/wgcna.py
@@ -19,7 +19,7 @@ def dump_wgcna_data(request_data: dict):
request_data["TMPDIR"] = TMPDIR
- with open(temp_file_path, "w") as output_file:
+ with open(temp_file_path, "w", encoding="utf-8") as output_file:
json.dump(request_data, output_file)
return temp_file_path
@@ -75,7 +75,7 @@ def call_wgcna_script(rscript_path: str, request_data: dict):
run_cmd_results = run_cmd(cmd)
- with open(generated_file, "r") as outputfile:
+ with open(generated_file, "r", encoding="utf-8") as outputfile:
if run_cmd_results["code"] != 0:
return run_cmd_results
diff --git a/gn3/fs_helpers.py b/gn3/fs_helpers.py
index 73f6567..578269b 100644
--- a/gn3/fs_helpers.py
+++ b/gn3/fs_helpers.py
@@ -41,7 +41,7 @@ def get_dir_hash(directory: str) -> str:
def jsonfile_to_dict(json_file: str) -> Dict:
"""Give a JSON_FILE, return a python dict"""
- with open(json_file) as _file:
+ with open(json_file, encoding="utf-8") as _file:
data = json.load(_file)
return data
raise FileNotFoundError
diff --git a/guix-system.scm b/guix-system.scm
index c154d01..7142cea 100644
--- a/guix-system.scm
+++ b/guix-system.scm
@@ -111,10 +111,11 @@ function."
%base-file-systems))
(users %base-user-accounts)
(packages %base-packages)
- (services (cons* (service virtuoso-service-type
- (virtuoso-configuration
- (http-server-port 8891)))
- (service genenetwork3-service-type
- (genenetwork3-configuration
- (port 5000)))
- %base-services)))
+ (services (cons*
+ ;; (service virtuoso-service-type
+ ;; (virtuoso-configuration
+ ;; (http-server-port 8891)))
+ (service genenetwork3-service-type
+ (genenetwork3-configuration
+ (port 5000)))
+ %base-services)))
diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py
index b36a93e..137c95c 100644
--- a/tests/unit/computations/test_gemma.py
+++ b/tests/unit/computations/test_gemma.py
@@ -22,7 +22,7 @@ class TestGemma(unittest.TestCase):
self.assertEqual(_file, ("/tmp/gn2/phenotype_"
"P7y6QWnwBPedSZdL0+m/GQ.txt"))
open_mock.assert_called_with(("/tmp/gn2/phenotype_"
- "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w")
+ "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w", encoding="utf-8")
open_mock.return_value.write.assert_has_calls([
mock.call("NA\n"),
mock.call("NA\n"),
diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py
index 3130374..a9108b0 100644
--- a/tests/unit/computations/test_wgcna.py
+++ b/tests/unit/computations/test_wgcna.py
@@ -166,7 +166,7 @@ class TestWgcna(TestCase):
expected_input)
file_handler.assert_called_once_with(
- "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w')
+ "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w', encoding='utf-8')
self.assertEqual(
results, "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json")