From 06f480b625fe5a240ddcdf3e6887f0796cfefb52 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Wed, 24 Feb 2021 10:27:02 +0300 Subject: Add new procedure that computes the hash of an array of strings --- gn3/computations/gemma.py | 7 +++++++ tests/unit/computations/test_gemma.py | 13 ++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/gn3/computations/gemma.py b/gn3/computations/gemma.py index 790aecb..ea0f86a 100644 --- a/gn3/computations/gemma.py +++ b/gn3/computations/gemma.py @@ -2,6 +2,8 @@ import random import string +from base64 import b64encode +from hashlib import md5 def generate_random_n_string(n_length: int) -> str: """Generate a random string that is N chars long""" @@ -9,6 +11,11 @@ def generate_random_n_string(n_length: int) -> str: for _ in range(n_length)) +def generate_hash_of_string(unhashed_str: str) -> str: + """Given an UNHASHED_STRING, generate it's md5 hash while removing the '==' at +the end""" + hashed_str = md5(unhashed_str.encode("utf-8")).digest() + return b64encode(hashed_str).decode("utf-8").replace("==", "") def generate_pheno_txt_file(trait_filename: str, values: str, tmpdir: str = "/tmp") -> str: diff --git a/tests/unit/computations/test_gemma.py b/tests/unit/computations/test_gemma.py index 89c8d00..23707ef 100644 --- a/tests/unit/computations/test_gemma.py +++ b/tests/unit/computations/test_gemma.py @@ -2,12 +2,12 @@ import unittest from unittest import mock +from gn3.computations.gemma import generate_hash_of_string from gn3.computations.gemma import generate_pheno_txt_file class TestGemma(unittest.TestCase): """Test cases for computations.gemma module""" - def test_generate_pheno_txt_file(self): """Test that the pheno text file is generated correctly""" open_mock = mock.mock_open() @@ -16,9 +16,16 @@ class TestGemma(unittest.TestCase): _file = generate_pheno_txt_file(tmpdir="/tmp", trait_filename="phenotype.txt", values=["x", "x", "BXD07 438.700"]) - self.assertEqual(_file, "/tmp/gn2/phenotype.txt") - open_mock.assert_called_with("/tmp/gn2/phenotype.txt", "w") + self.assertEqual(_file, ("/tmp/gn2/phenotype_" + "P7y6QWnwBPedSZdL0+m/GQ.txt")) + open_mock.assert_called_with(("/tmp/gn2/phenotype_" + "P7y6QWnwBPedSZdL0+m/GQ.txt"), "w") open_mock.return_value.write.assert_has_calls([ mock.call("NA\n"), mock.call("NA\n"), mock.call("BXD07 438.700\n")]) + + def test_generate_hash_of_string(self): + """Test that a string is hashed correctly""" + self.assertEqual(generate_hash_of_string("I^iQP&TlSR^z"), + "hMVRw8kbEp49rOmoIkhMjA") -- cgit v1.2.3