From 4b80bbfe261e8d0587a637b35416834e027f0999 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 20 Apr 2016 08:46:23 +0000 Subject: [PATCH 024/100] Sanitizing file handling --- wqflask/base/data_set.py | 29 ++++++++++------------------- wqflask/utility/tools.py | 20 +++++++++++++++----- wqflask/wqflask/show_trait/show_trait.py | 11 ++++------- 3 files changed, 29 insertions(+), 31 deletions(-) (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 0e5a3ac1..c6f34143 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -44,12 +44,15 @@ from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench from utility import chunks +from utility.tools import flat_files from maintenance import get_group_samplelists from MySQLdb import escape_string as escape from pprint import pformat as pf +MAPPING_PATH = flat_files("mapping") + # Used by create_database to instantiate objects # Each subclass will add to this DS_NAME_MAP = {} @@ -404,15 +407,11 @@ class DatasetGroup(object): else: #print("Cache not hit") - from utility.tools import plink_command - PLINK_RUN = plink_command() - geno_file_path = webqtlConfig.GENODIR+self.name+".geno" - plink_file_path = PLINK_PATH+"/"+self.name+".fam" - # @FIXME PJOTR/ZACH: .fam files should go into FLATFILES - if os.path.isfile(plink_file_path): - self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) + mapping_file_path = MAPPING_PATH+"/"+self.name+".fam" + if os.path.isfile(mapping_file_path): + self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_file_path) elif os.path.isfile(geno_file_path): self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) else: @@ -441,18 +440,10 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) - if os.path.isfile(full_filename): - #print("Reading file: ", full_filename) - genotype_1.read(full_filename) - #print("File read") - else: - try: - full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) - #print("Reading file") - genotype_1.read(full_filename) - #print("File read") - except IOError: - print("File doesn't exist!") + if not os.path.isfile(full_filename): + raise SystemError("File "+full_filename+" does not exist") + print("Reading file: ", full_filename) + genotype_1.read(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index c3c9b292..44bbb1b0 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -52,17 +52,27 @@ def get_setting(command_id,guess=None): raise Exception(command_id+' path unknown or faulty (update settings.py?). '+command_id+' should point to the path') return command +def valid_bin(bin): + if os.path.islink(bin) or os.path.isfile(bin): + return bin + return None + +def valid_path(dir): + if os.path.isdir(dir): + return dir + return None + def pylmm_command(guess=None): - return get_setting("PYLMM_RUN",guess) + return valid_bin(get_setting("PYLMM_RUN",guess)) def gemma_command(guess=None): - return get_setting("GEMMA_RUN",guess) + return valid_bin(get_setting("GEMMA_RUN",guess)) def plink_command(guess=None): - return get_setting("PLINK_RUN",guess) + return valid_bin(get_setting("PLINK_RUN",guess)) def flat_files(subdir=None): base = get_setting("GENENETWORK_FILES") if subdir: - return base+"/"+subdir - return base + return valid_path(base+"/"+subdir) + return valid_path(base) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 458e48da..35f7fe5f 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -16,8 +16,6 @@ from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from utility import webqtlUtil, Plot, Bunch, helper_functions -# from utility.tools import plink_command -from utility.tools import flat_files from base.trait import GeneralTrait from base import data_set from dbFunction import webqtlDatabaseFunction @@ -25,7 +23,8 @@ from basicStatistics import BasicStatisticsFunctions from pprint import pformat as pf -MAPPING_FILES = flat_files("mapping") +from utility.tools import flat_files +MAPPING_PATH = flat_files("mapping") ############################################### # @@ -34,8 +33,6 @@ MAPPING_FILES = flat_files("mapping") # ############################################## - - class ShowTrait(object): def __init__(self, kw): @@ -162,8 +159,8 @@ class ShowTrait(object): def get_mapping_methods(self): '''Only display mapping methods when the dataset group's genotype file exists''' def check_plink_gemma(): - if (os.path.isfile(MAPPYING_FILES+"/"+self.dataset.group.name+".bed") and - os.path.isfile(MAPPING_FILES+"/"+self.dataset.group.name+".map")): + if (os.path.isfile(MAPPYING_PATH+"/"+self.dataset.group.name+".bed") and + os.path.isfile(MAPPING_PATH+"/"+self.dataset.group.name+".map")): return True else: return False -- cgit v1.2.3