aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-02-24 10:27:02 +0300
committerBonfaceKilz2021-02-24 14:20:29 +0300
commit06f480b625fe5a240ddcdf3e6887f0796cfefb52 (patch)
treeb58a8b03be9a7ce4e22f1f55106b2e9f668760a6
parent06250b830c8cc26b66354363935c3b9b06a6e7cc (diff)
downloadgenenetwork3-06f480b625fe5a240ddcdf3e6887f0796cfefb52.tar.gz
Add new procedure that computes the hash of an array of strings
-rw-r--r--gn3/computations/gemma.py7
-rw-r--r--tests/unit/computations/test_gemma.py13
2 files changed, 17 insertions, 3 deletions
diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py
index 790aecb..ea0f86a 100644
--- a/gn3/computations/gemma.py
+++ b/gn3/computations/gemma.py
@@ -2,6 +2,8 @@
import random
import string
+from base64 import b64encode
+from hashlib import md5
def generate_random_n_string(n_length: int) -> str:
"""Generate a random string that is N chars long"""
@@ -9,6 +11,11 @@ def generate_random_n_string(n_length: int) -> str:
for _ in range(n_length))
+def generate_hash_of_string(unhashed_str: str) -> str:
+ """Given an UNHASHED_STRING, generate it's md5 hash while removing the '==' at
+the end"""
+ hashed_str = md5(unhashed_str.encode("utf-8")).digest()
+ return b64encode(hashed_str).decode("utf-8").replace("==", "")
def generate_pheno_txt_file(trait_filename: str,
values: str,
tmpdir: str = "/tmp") -> str:
diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py
index 89c8d00..23707ef 100644
--- a/tests/unit/computations/test_gemma.py
+++ b/tests/unit/computations/test_gemma.py
@@ -2,12 +2,12 @@
import unittest
from unittest import mock
+from gn3.computations.gemma import generate_hash_of_string
from gn3.computations.gemma import generate_pheno_txt_file
class TestGemma(unittest.TestCase):
"""Test cases for computations.gemma module"""
-
def test_generate_pheno_txt_file(self):
"""Test that the pheno text file is generated correctly"""
open_mock = mock.mock_open()
@@ -16,9 +16,16 @@ class TestGemma(unittest.TestCase):
_file = generate_pheno_txt_file(tmpdir="/tmp",
trait_filename="phenotype.txt",
values=["x", "x", "BXD07 438.700"])
- self.assertEqual(_file, "/tmp/gn2/phenotype.txt")
- open_mock.assert_called_with("/tmp/gn2/phenotype.txt", "w")
+ self.assertEqual(_file, ("/tmp/gn2/phenotype_"
+ "P7y6QWnwBPedSZdL0+m/GQ.txt"))
+ open_mock.assert_called_with(("/tmp/gn2/phenotype_"
+ "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w")
open_mock.return_value.write.assert_has_calls([
mock.call("NA\n"),
mock.call("NA\n"),
mock.call("BXD07 438.700\n")])
+
+ def test_generate_hash_of_string(self):
+ """Test that a string is hashed correctly"""
+ self.assertEqual(generate_hash_of_string("I^iQP&TlSR^z"),
+ "hMVRw8kbEp49rOmoIkhMjA")