"""Procedures related gemma computations""" import errno import os from base64 import b64encode from hashlib import md5 from typing import Optional, Dict from typing import List from typing import ValuesView from gn3.commands import compose_gemma_cmd from gn3.fs_helpers import get_hash_of_files, assert_paths_exist def generate_hash_of_string(unhashed_str: str) -> str: """Given an UNHASHED_STRING, generate it's md5 hash while removing the '==' at the end""" hashed_str = md5(unhashed_str.encode("utf-8")).digest() return b64encode(hashed_str).decode("utf-8").replace("==", "") def generate_pheno_txt_file(trait_filename: str, values: List, tmpdir: str = "/tmp") -> str: """Given VALUES, and TMPDIR, generate a valid traits file""" if not os.path.isdir(f"{tmpdir}/gn3/"): os.mkdir(f"{tmpdir}/gn3/") ext = trait_filename.partition(".")[-1] if ext: trait_filename = trait_filename.replace(f".{ext}", "") ext = f".{ext}" trait_filename += f"_{generate_hash_of_string(''.join(values))}{ext}" # Early return if this already exists! if os.path.isfile(f"{tmpdir}/gn3/{trait_filename}"): return f"{tmpdir}/gn3/{trait_filename}" with open(f"{tmpdir}/gn3/{trait_filename}", "w", encoding="utf-8") as _file: for value in values: if value == "x": _file.write("NA\n") else: _file.write(f"{value}\n") return f"{tmpdir}/gn3/{trait_filename}" # pylint: disable=R0913 def generate_gemma_cmd(gemma_cmd: str, output_dir: str, token: str, gemma_kwargs: Dict, gemma_wrapper_kwargs: Optional[Dict] = None, chromosomes: Optional[str] = None) -> Dict: """Compute k values""" _hash = get_hash_of_files( [v for k, v in gemma_kwargs.items() if k in ["g", "p", "a", "c"]]) if chromosomes: # Only reached when calculating k-values gemma_wrapper_kwargs = {"loco": f"{chromosomes}"} _hash += f"-{generate_hash_of_string(chromosomes)[:6]}" _output_filename = f"{_hash}-output.json" return { "output_file": _output_filename, "gemma_cmd": compose_gemma_cmd(gemma_wrapper_cmd=gemma_cmd, gemma_wrapper_kwargs=gemma_wrapper_kwargs, gemma_kwargs=gemma_kwargs, gemma_args=[ "-gk", ">", (f"{output_dir}/" f"{token}/{_output_filename}") ]) }