diff options
author | zsloan | 2016-05-17 16:03:05 +0000 |
---|---|---|
committer | zsloan | 2016-05-17 16:03:05 +0000 |
commit | 0d590051ce073a169295382e5adac34cc803e5ac (patch) | |
tree | 76bea6f1f69288a498e9f13d20b638b942ab0a0c /wqflask/utility | |
parent | f6cee1731772c3482b0afd4c77472d69e747c655 (diff) | |
parent | 04afa563e6d53fe2a91ac2e6eb4af2f2fa5d5c3b (diff) | |
download | genenetwork2-0d590051ce073a169295382e5adac34cc803e5ac.tar.gz |
Merge branch 'staging' of github.com:genenetwork/genenetwork2
Diffstat (limited to 'wqflask/utility')
-rw-r--r-- | wqflask/utility/external.py | 9 | ||||
-rw-r--r-- | wqflask/utility/genofile_parser.py | 100 | ||||
-rw-r--r-- | wqflask/utility/tools.py | 189 |
3 files changed, 230 insertions, 68 deletions
diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py new file mode 100644 index 00000000..50afea08 --- /dev/null +++ b/wqflask/utility/external.py @@ -0,0 +1,9 @@ +# Call external program + +import os +import sys +import subprocess + +def shell(command): + if subprocess.call(command, shell=True) != 0: + raise Exception("ERROR: failed on "+command) diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py new file mode 100644 index 00000000..67b84dc9 --- /dev/null +++ b/wqflask/utility/genofile_parser.py @@ -0,0 +1,100 @@ +# CTL analysis for GN2 +# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> + +from __future__ import print_function, division, absolute_import +import sys +import os +import glob +import traceback +import gzip + + +import simplejson as json + +from pprint import pformat as pf + +class Marker(object): + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + + +class ConvertGenoFile(object): + + def __init__(self, input_file): + self.mb_exists = False + self.cm_exists = False + self.markers = [] + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + self.input_fh = open(input_file) + print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") + self.haplotype_notation = { + '@mat': "1", + '@pat': "2", + '@het': "-999", + '@unk': "-999" + } + self.configurations = {} + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + skip = 2 + self.cm_exists + self.mb_exists + self.individuals = row.split()[skip:] + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + def process_csv(self): + for row_count, row in enumerate(self.process_rows()): + row_items = row.split("\t") + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + else: + print("WARNING:", genotype.upper()) + this_marker.genotypes.append("NA") + self.markers.append(this_marker.__dict__) + diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index b8a41f60..dd8c4a1e 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -1,84 +1,137 @@ # Tools/paths finder resolves external paths from settings and/or environment # variables -# -# Currently supported: -# -# PYLMM_PATH finds the root of the git repository of the pylmm_gn2 tool import os import sys from wqflask import app -def get_setting(id,default,guess,get_valid_path): - """ - Resolve a setting from the environment or the global settings in app.config +def get_setting(command_id,guess=None): + """Resolve a setting from the environment or the global settings in + app.config, with get_valid_path is a function checking whether the + path points to an expected directory and returns the full path to + the binary command + + guess = os.environ.get('HOME')+'/pylmm' + get_setting('PYLMM_PATH',guess) + + first tries the environment variable in +id+, next gets the Flask + app setting for the same +id+ and finally does an educated + +guess+. + + In all, the environment overrides the others, next is the flask + setting, then the guess. A valid path to the binary command is + returned. If none is resolved an exception is thrown. + + Note that we do not use the system path. This is on purpose + because it will mess up controlled (reproducible) deployment. The + proper way is to either use the GNU Guix defaults as listed in + etc/default_settings.py or override them yourself by creating a + different settings.py file (or setting the environment). + """ + def value(command): + if command: + sys.stderr.write("Found path "+command+"\n") + return command + else: + return None + # ---- Check whether environment exists - path = get_valid_path(os.environ.get(id)) - # ---- Check whether setting exists - setting = app.config.get(id) - if not path: - path = get_valid_path(setting) - # ---- Check whether default exists - if not path: - path = get_valid_path(default) - # ---- Guess directory - if not path: - if not setting: - setting = guess - path = get_valid_path(guess) - if not path: - raise Exception(id+' '+setting+' path unknown or faulty (update settings.py?). '+id+' should point to the root of the git repository') - - return path - -def pylmm_command(default=None): + sys.stderr.write("Looking for "+command_id+"\n") + command = value(os.environ.get(command_id)) + if not command: + # ---- Check whether setting exists in app + command = value(app.config.get(command_id)) + if not command: + command = value(guess) + if not command: + raise Exception(command_id+' path unknown or faulty (update settings.py?). '+command_id+' should point to the path') + return command + +def valid_bin(bin): + if os.path.islink(bin) or valid_file(bin): + return bin + return None + +def valid_file(fn): + if os.path.isfile(fn): + return fn + return None + +def valid_path(dir): + if os.path.isdir(dir): + return dir + return None + +def pylmm_command(guess=None): + return valid_bin(get_setting("PYLMM_COMMAND",guess)) + +def gemma_command(guess=None): + return valid_bin(get_setting("GEMMA_COMMAND",guess)) + +def plink_command(guess=None): + return valid_bin(get_setting("PLINK_COMMAND",guess)) + +def flat_files(subdir=None): + base = get_setting("GENENETWORK_FILES") + if subdir: + return assert_dir(base+"/"+subdir) + return assert_dir(base) + +def assert_dir(dir): + if not valid_path(dir): + raise Exception("ERROR: can not find directory "+dir) + return dir + +def mk_dir(dir): + if not valid_path(dir): + os.makedirs(dir) + return assert_dir(dir) + +def locate(name, subdir=None): """ - Return the path to the repository and the python command to call + Locate a static flat file in the GENENETWORK_FILES environment. + + This function throws an error when the file is not found. """ - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PYLMM_PATH in "+path+"\n") - if path and os.path.isfile(path+'/pylmm_gn2/lmm.py'): - return path + base = get_setting("GENENETWORK_FILES") + if subdir: + base = base+"/"+subdir + if valid_path(base): + lookfor = base + "/" + name + if valid_file(lookfor): + print("Found: file "+lookfor+"\n") + return lookfor else: - None + raise Exception("Can not locate "+lookfor) + if subdir: sys.stderr.write(subdir) + raise Exception("Can not locate "+name+" in "+base) - guess = os.environ.get('HOME')+'/pylmm_gn2' - path = get_setting('PYLMM_PATH',default,guess,get_valid_path) - pylmm_command = 'python '+path+'/pylmm_gn2/lmm.py' - return path,pylmm_command - -def plink_command(default=None): +def locate_ignore_error(name, subdir=None): """ - Return the path to the repository and the python command to call + Locate a static flat file in the GENENETWORK_FILES environment. + + This function does not throw an error when the file is not found + but returns None. """ - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PLINK_PATH in "+path+"\n") - if path and os.path.isfile(path+'/plink'): - return path - else: - None - - guess = os.environ.get('HOME')+'/plink_gemma' - path = get_setting('PLINK_PATH',default,guess,get_valid_path) - plink_command = path+'/plink' - return path,plink_command - -def gemma_command(default=None): - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PLINK_PATH in "+path+"\n") - if path and os.path.isfile(path+'/plink'): - return path - else: - None + base = get_setting("GENENETWORK_FILES") + if subdir: + base = base+"/"+subdir + if valid_path(base): + lookfor = base + "/" + name + if valid_file(lookfor): + print("Found: file "+name+"\n") + return lookfor + sys.stderr.write("WARNING: file "+name+" not found\n") + return None + +def tempdir(): + return valid_path(get_setting("TEMPDIR","/tmp")) - guess = os.environ.get('HOME')+'/plink' - path = get_setting('PLINK_PATH',default,guess,get_valid_path) - gemma_command = path+'/gemma' - return path, gemma_command
\ No newline at end of file + +# Cached values +PYLMM_COMMAND = pylmm_command() +GEMMA_COMMAND = gemma_command() +PLINK_COMMAND = plink_command() +FLAT_FILES = flat_files() +TEMPDIR = tempdir() |