diff options
author | BonfaceKilz | 2021-02-24 10:27:02 +0300 |
---|---|---|
committer | BonfaceKilz | 2021-02-24 14:20:29 +0300 |
commit | 06f480b625fe5a240ddcdf3e6887f0796cfefb52 (patch) | |
tree | b58a8b03be9a7ce4e22f1f55106b2e9f668760a6 | |
parent | 06250b830c8cc26b66354363935c3b9b06a6e7cc (diff) | |
download | genenetwork3-06f480b625fe5a240ddcdf3e6887f0796cfefb52.tar.gz |
Add new procedure that computes the hash of an array of strings
-rw-r--r-- | gn3/computations/gemma.py | 7 | ||||
-rw-r--r-- | tests/unit/computations/test_gemma.py | 13 |
2 files changed, 17 insertions, 3 deletions
diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py index 790aecb..ea0f86a 100644 --- a/gn3/computations/gemma.py +++ b/gn3/computations/gemma.py @@ -2,6 +2,8 @@ import random import string +from base64 import b64encode +from hashlib import md5 def generate_random_n_string(n_length: int) -> str: """Generate a random string that is N chars long""" @@ -9,6 +11,11 @@ def generate_random_n_string(n_length: int) -> str: for _ in range(n_length)) +def generate_hash_of_string(unhashed_str: str) -> str: + """Given an UNHASHED_STRING, generate it's md5 hash while removing the '==' at +the end""" + hashed_str = md5(unhashed_str.encode("utf-8")).digest() + return b64encode(hashed_str).decode("utf-8").replace("==", "") def generate_pheno_txt_file(trait_filename: str, values: str, tmpdir: str = "/tmp") -> str: diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py index 89c8d00..23707ef 100644 --- a/tests/unit/computations/test_gemma.py +++ b/tests/unit/computations/test_gemma.py @@ -2,12 +2,12 @@ import unittest from unittest import mock +from gn3.computations.gemma import generate_hash_of_string from gn3.computations.gemma import generate_pheno_txt_file class TestGemma(unittest.TestCase): """Test cases for computations.gemma module""" - def test_generate_pheno_txt_file(self): """Test that the pheno text file is generated correctly""" open_mock = mock.mock_open() @@ -16,9 +16,16 @@ class TestGemma(unittest.TestCase): _file = generate_pheno_txt_file(tmpdir="/tmp", trait_filename="phenotype.txt", values=["x", "x", "BXD07 438.700"]) - self.assertEqual(_file, "/tmp/gn2/phenotype.txt") - open_mock.assert_called_with("/tmp/gn2/phenotype.txt", "w") + self.assertEqual(_file, ("/tmp/gn2/phenotype_" + "P7y6QWnwBPedSZdL0+m/GQ.txt")) + open_mock.assert_called_with(("/tmp/gn2/phenotype_" + "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w") open_mock.return_value.write.assert_has_calls([ mock.call("NA\n"), mock.call("NA\n"), mock.call("BXD07 438.700\n")]) + + def test_generate_hash_of_string(self): + """Test that a string is hashed correctly""" + self.assertEqual(generate_hash_of_string("I^iQP&TlSR^z"), + "hMVRw8kbEp49rOmoIkhMjA") |