diff options
-rwxr-xr-x | bin/genenetwork2 | 14 | ||||
-rwxr-xr-x | bin/test-website | 12 | ||||
-rw-r--r-- | etc/default_settings.py | 5 | ||||
-rw-r--r-- | test/lib/gntest.rb | 1 | ||||
-rw-r--r-- | test/lib/mapping.rb | 1 | ||||
-rw-r--r-- | wqflask/base/webqtlConfig.py | 22 | ||||
-rw-r--r-- | wqflask/utility/tools.py | 15 | ||||
-rw-r--r-- | wqflask/wqflask/collect.py | 60 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/marker_regression.py | 343 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/plink_mapping.py | 161 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/qtlreaper_mapping.py | 91 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/rqtl_mapping.py | 384 | ||||
-rw-r--r-- | wqflask/wqflask/templates/base.html | 28 | ||||
-rw-r--r-- | wqflask/wqflask/templates/ctl_results.html | 2 | ||||
-rw-r--r-- | wqflask/wqflask/templates/ctl_setup.html | 2 | ||||
-rw-r--r-- | wqflask/wqflask/templates/error.html | 4 | ||||
-rwxr-xr-x | wqflask/wqflask/templates/index_page_orig.html | 64 | ||||
-rw-r--r-- | wqflask/wqflask/user_manager.py | 1 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 12 |
19 files changed, 616 insertions, 606 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2 index d7d1c325..52d3155c 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -26,6 +26,10 @@ GN2_BASE_DIR=$(dirname $(dirname "$SCRIPT")) echo GN2_BASE_DIR=$GN2_BASE_DIR +GIT_HASH=$(git rev-parse HEAD) +GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +export GN_VERSION=$(cat $GN2_BASE_DIR/VERSION)-$GIT_BRANCH-${GIT_HASH:0:9} +echo GN_VERSION=$GN_VERSION # Handle settings parameter (can be .py or .json) settings=$1 @@ -79,13 +83,13 @@ fi # We may change this one: export PYTHONPATH=$GN2_BASE_DIR/wqflask:$PYTHONPATH -# TEMPDIR defaults to /tmp if nothing else -if [ -z $TEMPDIR ]; then - TEMPDIR="/tmp" +# Our UNIX TMPDIR defaults to /tmp - change this on a shared server +if [ -z $TMPDIR ]; then + TMPDIR="/tmp" fi set|grep $GN2_PROFILE -set|grep TEMPDIR +set|grep TMPDIR # Now handle command parameter -c if [ "$1" = '-c' ] ; then @@ -98,7 +102,7 @@ if [ "$1" = '-c' ] ; then fi echo "Starting the redis server:" -echo -n "dir $TEMPDIR +echo -n "dir $TMPDIR dbfilename gn2.rdb " | redis-server - & diff --git a/bin/test-website b/bin/test-website index c9a72a5e..be223d94 100755 --- a/bin/test-website +++ b/bin/test-website @@ -52,12 +52,12 @@ opts = OptionParser.new do |o| options[:link_checker] = true end - o.on('--navigation-test', 'Check for navigation') do - options[:navigation_test] = true + o.on('--navigation', 'Check for navigation') do + options[:navigation] = true end - o.on('--mapping-test', 'Check for mapping') do - options[:mapping_test] = true + o.on('--mapping', 'Check for mapping') do + options[:mapping] = true end o.on('--skip-broken', 'Skip tests that are known to be broken') do @@ -98,7 +98,7 @@ $: << File.join(libpath,'test/lib') require 'main_web_functionality' -if options[:all] or options[:mapping_test] +if options[:all] or options[:mapping] require 'mapping' end @@ -106,6 +106,6 @@ if options[:all] or options[:link_checker] require 'link_checker' end -if options[:all] or options[:navigation_test] +if options[:all] or options[:navigation] require 'navigation' end diff --git a/etc/default_settings.py b/etc/default_settings.py index d8e57f38..6acea637 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -18,6 +18,7 @@ import os import sys +GN_VERSION = open("../VERSION","r").read() SQL_URI = "mysql://gn2:mysql_password@localhost/db_webqtl_s" SQLALCHEMY_DATABASE_URI = 'mysql://gn2:mysql_password@localhost/db_webqtl_s' SQLALCHEMY_POOL_RECYCLE = 3600 @@ -50,6 +51,10 @@ LOG_BENCH = True # Log bench marks USE_REDIS = True # REDIS caching (note that redis will be phased out) USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls +# Paths to JS libraries + +TWITTER_POST_FETCHER_JS_PATH = os.environ['HOME']+"/genenetwork/Twitter-Post-Fetcher" + # ---- Path overrides for Genenetwork # TMPDIR is normally picked up from the environment HOME=os.environ['HOME'] diff --git a/test/lib/gntest.rb b/test/lib/gntest.rb index 865ef51d..5b21b3d5 100644 --- a/test/lib/gntest.rb +++ b/test/lib/gntest.rb @@ -1,5 +1,6 @@ require 'minitest/autorun' require 'mechanize' +require 'json' # ---- Use some default parameters if not set $host = "http://localhost:5003" if !$host diff --git a/test/lib/mapping.rb b/test/lib/mapping.rb index d6a3cd7b..20e5bd40 100644 --- a/test/lib/mapping.rb +++ b/test/lib/mapping.rb @@ -36,6 +36,7 @@ describe MappingTest do json, ({'Content-Type' => 'application/x-www-form-urlencoded'})) form = page.forms_with("marker_regression")[0] + p form form.fields.select { |fld| fld.name == 'dataset' }.first.value.must_equal 'HC_M2_0606_P' form.fields.select { |fld| fld.name == 'value:BXD1' }.first.value.must_equal "15.034" end diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 6bbabdec..e5f10edf 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -8,7 +8,7 @@ # ######################################### -from utility.tools import valid_path, mk_dir, assert_dir, flat_files, TEMPDIR +from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR #Debug Level #1 for debug, mod python will reload import each time @@ -60,24 +60,36 @@ ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?sp # HTMLPATH is replaced by GENODIR # IMGDIR is replaced by GENERATED_IMAGE_DIR -# Temporary storage (note that this TMPDIR is not the same directory -# as the UNIX TMPDIR) +# Temporary storage (note that this TMPDIR can be set as an +# environment variable - use utility.tools.TEMPDIR when you +# want to reach this base dir +assert_writable_dir(TEMPDIR) + TMPDIR = mk_dir(TEMPDIR+'/gn2/') +assert_writable_dir(TMPDIR) + CACHEDIR = mk_dir(TMPDIR+'/cache/') # We can no longer write into the git tree: GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generated/') GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generated_text/') +# Make sure we have permissions to access these +assert_writable_dir(CACHEDIR) +assert_writable_dir(GENERATED_IMAGE_DIR) +assert_writable_dir(GENERATED_TEXT_DIR) + # Flat file directories GENODIR = flat_files('genotype')+'/' +assert_dir(GENODIR) + +# JSON genotypes are OBSOLETE JSON_GENODIR = flat_files('genotype/json')+'/' if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') -assert_dir(GENODIR) +# Are we using the following...? PORTADDR = "http://50.16.251.170" - INFOPAGEHREF = '/dbdoc/%s.html' CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' SCRIPTFILE = 'main.py' diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 23d6fb62..df032e48 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -113,6 +113,16 @@ def assert_dir(dir): raise Exception("ERROR: can not find directory "+dir) return dir +def assert_writable_dir(dir): + try: + fn = dir + "/test.txt" + fh = open( fn, 'w' ) + fh.write("I am writing this text to the file\n") + fh.close() + os.remove(fn) + except IOError: + raise Exception('Unable to write test.txt to directory ' + dir ) + return dir def mk_dir(dir): if not valid_path(dir): @@ -187,6 +197,7 @@ def show_settings(): # Cached values +GN_VERSION = get_setting('GN_VERSION') HOME = get_setting('HOME') WEBSERVER_MODE = get_setting('WEBSERVER_MODE') GN_SERVER_URL = get_setting('GN_SERVER_URL') @@ -205,7 +216,7 @@ GENENETWORK_FILES = get_setting('GENENETWORK_FILES') PYLMM_COMMAND = pylmm_command() GEMMA_COMMAND = gemma_command() PLINK_COMMAND = plink_command() -TEMPDIR = tempdir() +TEMPDIR = tempdir() # defaults to UNIX TMPDIR from six import string_types @@ -221,3 +232,5 @@ if os.environ.get('WQFLASK_OVERRIDES'): else: OVERRIDES[k] = cmd logger.debug(OVERRIDES) + +assert_dir(get_setting("TWITTER_POST_FETCHER_JS_PATH")) diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 81d03d6c..70ae2a1c 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -55,7 +55,7 @@ class AnonCollection(object): #ZS: Find id and set it if the collection doesn't already exist if Redis.get(self.key) == "None" or Redis.get(self.key) == None: - Redis.set(self.key, None) #ZS: For some reason I get the error "Operation against a key holding the wrong kind of value" if I don't do this + Redis.set(self.key, None) #ZS: For some reason I get the error "Operation against a key holding the wrong kind of value" if I don't do this else: collections_list = json.loads(Redis.get(self.key)) collection_position = 0 #ZS: Position of collection in collection_list, if it exists @@ -66,9 +66,10 @@ class AnonCollection(object): collection_exists = True self.id = collection['id'] break + if self.id == None: self.id = str(uuid.uuid4()) - + def get_members(self): traits = [] collections_list = json.loads(Redis.get(self.key)) @@ -76,16 +77,16 @@ class AnonCollection(object): if collection['id'] == self.id: traits = collection['members'] return traits - + @property def num_members(self): - num_members = 0 + num_members = 0 collections_list = json.loads(Redis.get(self.key)) for collection in collections_list: if collection['id'] == self.id: num_members = collection['num_members'] return num_members - + def add_traits(self, params): #assert collection_name == "Default", "Unexpected collection name for anonymous user" self.traits = list(process_traits(params['traits'])) @@ -122,7 +123,7 @@ class AnonCollection(object): "num_members" : len(self.traits), "members" : self.traits} collections_list.append(collection_dict) - + Redis.set(self.key, json.dumps(collections_list)) #Redis.sadd(self.key, *list(traits)) #Redis.expire(self.key, 60 * 60 * 24 * 5) @@ -169,14 +170,14 @@ class UserCollection(object): len_before = len(members) traits = process_traits(params['traits']) - + members_now = members for trait in traits: if trait in members: continue else: members_now.append(trait) - + #members_now = list(members | traits) len_now = len(members_now) uc.members = json.dumps(members_now) @@ -191,6 +192,18 @@ class UserCollection(object): # Probably have to change that return redirect(url_for('view_collection', uc_id=uc.id)) +def process_traits(unprocessed_traits): + #print("unprocessed_traits are:", unprocessed_traits) + if isinstance(unprocessed_traits, basestring): + unprocessed_traits = unprocessed_traits.split(",") + traits = set() + for trait in unprocessed_traits: + #print("trait is:", trait) + data, _separator, hmac = trait.rpartition(':') + data = data.strip() + assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?" + traits.add (str(data)) + return traits def report_change(len_before, len_now): new_length = len_now - len_before @@ -224,11 +237,10 @@ def collections_add(): collections = collection_names, ) - @app.route("/collections/new") def collections_new(): params = request.args - + if "sign_in" in params: return redirect(url_for('login')) @@ -248,26 +260,12 @@ def collections_new(): else: CauseAnError - -def process_traits(unprocessed_traits): - #print("unprocessed_traits are:", unprocessed_traits) - if isinstance(unprocessed_traits, basestring): - unprocessed_traits = unprocessed_traits.split(",") - traits = set() - for trait in unprocessed_traits: - #print("trait is:", trait) - data, _separator, hmac = trait.rpartition(':') - data = data.strip() - assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?" - traits.add (str(data)) - return traits - def create_new(collection_name): params = request.args - + unprocessed_traits = params['traits'] traits = process_traits(unprocessed_traits) - + if g.user_session.logged_in: uc = model.UserCollection() uc.name = collection_name @@ -280,7 +278,7 @@ def create_new(collection_name): else: current_collections = user_manager.AnonUser().get_collections() ac = AnonCollection(collection_name) - ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') ac.add_traits(params) return redirect(url_for('view_collection', collection_id=ac.id)) @@ -345,7 +343,7 @@ def delete_collection(): else: collection_name = params['collection_name'] user_manager.AnonUser().delete_collection(collection_name) - + flash("We've deleted the collection: {}.".format(collection_name), "alert-info") return redirect(url_for('list_collections')) @@ -369,15 +367,15 @@ def view_collection(): break #this_collection = user_collections[params['collection_id']] traits = this_collection['members'] - + print("in view_collection traits are:", traits) trait_obs = [] json_version = [] for atrait in traits: - name, dataset_name = atrait.split(':') - + name, dataset_name = atrait.split(':') + trait_ob = trait.GeneralTrait(name=name, dataset_name=dataset_name) trait_ob.retrieve_info(get_qtl_info=True) trait_obs.append(trait_ob) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 543eeede..4d622f21 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -35,7 +35,7 @@ from utility import helper_functions from utility import Plot, Bunch from utility import temp_data from utility.benchmark import Bench -from wqflask.marker_regression import gemma_mapping, rqtl_mapping +from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR from utility.external import shell @@ -171,7 +171,7 @@ class MarkerRegression(object): if start_vars['pair_scan'] == "true": self.pair_scan = True if self.permCheck and self.num_perm > 0: - perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": @@ -203,9 +203,19 @@ class MarkerRegression(object): self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] logger.info("Running qtlreaper") - results = self.gen_reaper_results() + results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait, + self.dataset, + self.samples, + self.json_data, + self.num_perm, + self.bootCheck, + self.num_bootstrap, + self.do_control, + self.control_marker, + self.manhattan_plot) elif self.mapping_method == "plink": - results = self.run_plink() + results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) + #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") self.dataset.group.genofile = start_vars['genofile'] @@ -303,55 +313,12 @@ class MarkerRegression(object): perm_results = self.perm_output, ) - - def run_gemma(self): - """Generates p-values for each marker using GEMMA""" - - self.gen_pheno_txt_file() - - #os.chdir("/home/zas1024/gene/web/gemma") - - gemma_command = GEMMA_COMMAND + ' -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % ( - self.dataset.group.name, - self.dataset.group.name, - self.dataset.group.name) - #logger.debug("gemma_command:" + gemma_command) - - os.system(gemma_command) - - included_markers, p_values = self.parse_gemma_output() - - self.dataset.group.get_specified_markers(markers = included_markers) - self.dataset.group.markers.add_pvalues(p_values) - return self.dataset.group.markers.markers - - def parse_gemma_output(self): - included_markers = [] - p_values = [] - # Use a temporary file name here! - with open(webqtlConfig.GENERATED_TEXT_DIR+"/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file: - for line in output_file: - if line.startswith("chr"): - continue - else: - included_markers.append(line.split("\t")[1]) - p_values.append(float(line.split("\t")[10])) - #p_values[line.split("\t")[1]] = float(line.split("\t")[10]) - #logger.debug("p_values: ", p_values) - return included_markers, p_values - - def gen_pheno_txt_file(self): - """Generates phenotype file for GEMMA""" - with open(webqtlConfig.GENERATED_TEXT_DIR+"{}.fam".format(self.dataset.group.name), "w") as outfile: - for i, sample in enumerate(self.samples): - outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n") - def run_rqtl_plink(self): # os.chdir("") never do this inside a webserver!! output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) - self.gen_pheno_txt_file_plink(pheno_filename = output_filename) + plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename) rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) @@ -359,286 +326,6 @@ class MarkerRegression(object): count, p_values = self.parse_rqtl_output(plink_output_filename) - def run_plink(self): - plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) - - self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename) - - plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) - logger.debug("plink_command:", plink_command) - - os.system(plink_command) - - count, p_values = self.parse_plink_output(plink_output_filename) - - #for marker in self.dataset.group.markers.markers: - # if marker['name'] not in included_markers: - # logger.debug("marker:", marker) - # self.dataset.group.markers.markers.remove(marker) - # #del self.dataset.group.markers.markers[marker] - - logger.debug("p_values:", pf(p_values)) - - self.dataset.group.markers.add_pvalues(p_values) - - return self.dataset.group.markers.markers - - - def gen_pheno_txt_file_plink(self, pheno_filename = ''): - ped_sample_list = self.get_samples_from_ped_file() - output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") - header = 'FID\tIID\t%s\n' % self.this_trait.name - output_file.write(header) - - new_value_list = [] - - #if valueDict does not include some strain, value will be set to -9999 as missing value - for i, sample in enumerate(ped_sample_list): - try: - value = self.vals[i] - value = str(value).replace('value=','') - value = value.strip() - except: - value = -9999 - - new_value_list.append(value) - - - new_line = '' - for i, sample in enumerate(ped_sample_list): - j = i+1 - value = new_value_list[i] - new_line += '%s\t%s\t%s\n'%(sample, sample, value) - - if j%1000 == 0: - output_file.write(newLine) - new_line = '' - - if new_line: - output_file.write(new_line) - - output_file.close() - - def gen_pheno_txt_file_rqtl(self, pheno_filename = ''): - ped_sample_list = self.get_samples_from_ped_file() - output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") - header = 'FID\tIID\t%s\n' % self.this_trait.name - output_file.write(header) - - new_value_list = [] - - #if valueDict does not include some strain, value will be set to -9999 as missing value - for i, sample in enumerate(ped_sample_list): - try: - value = self.vals[i] - value = str(value).replace('value=','') - value = value.strip() - except: - value = -9999 - - new_value_list.append(value) - - - new_line = '' - for i, sample in enumerate(ped_sample_list): - j = i+1 - value = new_value_list[i] - new_line += '%s\t%s\t%s\n'%(sample, sample, value) - - if j%1000 == 0: - output_file.write(newLine) - new_line = '' - - if new_line: - output_file.write(new_line) - - output_file.close() - - # get strain name from ped file in order - def get_samples_from_ped_file(self): - ped_file= open("{}/{}.ped".format(PLINK_PATH, self.dataset.group.name),"r") - line = ped_file.readline() - sample_list=[] - - while line: - lineList = string.split(string.strip(line), '\t') - lineList = map(string.strip, lineList) - - sample_name = lineList[0] - sample_list.append(sample_name) - - line = ped_file.readline() - - return sample_list - - def gen_reaper_results(self): - genotype = self.dataset.group.read_genotype_file() - - if self.manhattan_plot != True: - genotype = genotype.addinterval() - - samples, values, variances, sample_aliases = self.this_trait.export_informative() - - trimmed_samples = [] - trimmed_values = [] - for i in range(0, len(samples)): - #if self.this_trait.data[samples[i]].name2 in self.dataset.group.samplelist: - if self.this_trait.data[samples[i]].name in self.samples: - trimmed_samples.append(samples[i]) - trimmed_values.append(values[i]) - - if self.num_perm < 100: - self.suggestive = 0 - self.significant = 0 - else: - self.perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=self.num_perm) - self.suggestive = self.perm_output[int(self.num_perm*0.37-1)] - self.significant = self.perm_output[int(self.num_perm*0.95-1)] - self.highly_significant = self.perm_output[int(self.num_perm*0.99-1)] - - self.json_data['suggestive'] = self.suggestive - self.json_data['significant'] = self.significant - - if self.control_marker != "" and self.do_control == "true": - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values, - control = str(self.control_marker)) - if self.bootCheck: - control_geno = [] - control_geno2 = [] - _FIND = 0 - for _chr in genotype: - for _locus in _chr: - if _locus.name == self.control_marker: - control_geno2 = _locus.genotype - _FIND = 1 - break - if _FIND: - break - if control_geno2: - _prgy = list(genotype.prgy) - for _strain in trimmed_samples: - _idx = _prgy.index(_strain) - control_geno.append(control_geno2[_idx]) - - self.bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - control = control_geno, - nboot = self.num_bootstrap) - else: - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values) - - if self.bootCheck: - self.bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - nboot = self.num_bootstrap) - - self.json_data['chr'] = [] - self.json_data['pos'] = [] - self.json_data['lod.hk'] = [] - self.json_data['markernames'] = [] - #if self.additive: - # self.json_data['additive'] = [] - - #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary - qtl_results = [] - for qtl in reaper_results: - reaper_locus = qtl.locus - #ZS: Convert chr to int - converted_chr = reaper_locus.chr - if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y": - converted_chr = int(reaper_locus.chr) - self.json_data['chr'].append(converted_chr) - self.json_data['pos'].append(reaper_locus.Mb) - self.json_data['lod.hk'].append(qtl.lrs) - self.json_data['markernames'].append(reaper_locus.name) - #if self.additive: - # self.json_data['additive'].append(qtl.additive) - locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb} - qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb, - "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance} - qtl_results.append(qtl) - - return qtl_results - - - def parse_plink_output(self, output_filename): - plink_results={} - - threshold_p_value = 0.01 - - result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb") - - header_line = result_fp.readline()# read header line - line = result_fp.readline() - - value_list = [] # initialize value list, this list will include snp, bp and pvalue info - p_value_dict = {} - count = 0 - - while line: - #convert line from str to list - line_list = self.build_line_list(line=line) - - # only keep the records whose chromosome name is in db - if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA': - - chr_name = self.species.chromosomes.chromosomes[int(line_list[0])] - snp = line_list[1] - BP = line_list[2] - p_value = float(line_list[-1]) - if threshold_p_value >= 0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - p_value_dict[snp] = float(p_value) - - if plink_results.has_key(chr_name): - value_list = plink_results[chr_name] - - # pvalue range is [0,1] - if threshold_p_value >=0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - value_list.append((snp, BP, p_value)) - count += 1 - - plink_results[chr_name] = value_list - value_list = [] - else: - if threshold_p_value >= 0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - value_list.append((snp, BP, p_value)) - count += 1 - - if value_list: - plink_results[chr_name] = value_list - - value_list=[] - - line = result_fp.readline() - else: - line = result_fp.readline() - - #if p_value_list: - # min_p_value = min(p_value_list) - #else: - # min_p_value = 0 - - return count, p_value_dict - - ###################################################### - # input: line: str,one line read from file - # function: convert line from str to list; - # output: lineList list - ####################################################### - def build_line_list(self, line=None): - - line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns - line_list = [item for item in line_list if item <>''] - line_list = map(string.strip, line_list) - - return line_list - - def run_permutations(self, temp_uuid): """Runs permutations and gets significant and suggestive LOD scores""" diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py new file mode 100644 index 00000000..b457b9a0 --- /dev/null +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -0,0 +1,161 @@ +import string +import os + +from base.webqtlConfig import TMPDIR +from utility import webqtlUtil +from utility.tools import PLINK_COMMAND + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def run_plink(this_trait, dataset, species, vals, maf): + plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) + + gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename) + + plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + PLINK_PATH, dataset.group.name, PLINK_PATH, dataset.group.name, + TMPDIR, plink_output_filename, this_trait.name, maf, TMPDIR, + plink_output_filename) + logger.debug("plink_command:", plink_command) + + os.system(plink_command) + + count, p_values = parse_plink_output(plink_output_filename, species) + + #for marker in self.dataset.group.markers.markers: + # if marker['name'] not in included_markers: + # logger.debug("marker:", marker) + # self.dataset.group.markers.markers.remove(marker) + # #del self.dataset.group.markers.markers[marker] + + logger.debug("p_values:", pf(p_values)) + dataset.group.markers.add_pvalues(p_values) + + return dataset.group.markers.markers + + +def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): + ped_sample_list = get_samples_from_ped_file(dataset) + output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") + header = 'FID\tIID\t%s\n' % this_trait.name + output_file.write(header) + + new_value_list = [] + + #if valueDict does not include some strain, value will be set to -9999 as missing value + for i, sample in enumerate(ped_sample_list): + try: + value = vals[i] + value = str(value).replace('value=','') + value = value.strip() + except: + value = -9999 + + new_value_list.append(value) + + new_line = '' + for i, sample in enumerate(ped_sample_list): + j = i+1 + value = new_value_list[i] + new_line += '%s\t%s\t%s\n'%(sample, sample, value) + + if j%1000 == 0: + output_file.write(newLine) + new_line = '' + + if new_line: + output_file.write(new_line) + + output_file.close() + +# get strain name from ped file in order +def get_samples_from_ped_file(dataset): + ped_file= open("{}/{}.ped".format(PLINK_PATH, dataset.group.name),"r") + line = ped_file.readline() + sample_list=[] + + while line: + lineList = string.split(string.strip(line), '\t') + lineList = map(string.strip, lineList) + + sample_name = lineList[0] + sample_list.append(sample_name) + + line = ped_file.readline() + + return sample_list + +def parse_plink_output(output_filename, species): + plink_results={} + + threshold_p_value = 0.01 + + result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb") + + header_line = result_fp.readline()# read header line + line = result_fp.readline() + + value_list = [] # initialize value list, this list will include snp, bp and pvalue info + p_value_dict = {} + count = 0 + + while line: + #convert line from str to list + line_list = build_line_list(line=line) + + # only keep the records whose chromosome name is in db + if species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA': + + chr_name = species.chromosomes.chromosomes[int(line_list[0])] + snp = line_list[1] + BP = line_list[2] + p_value = float(line_list[-1]) + if threshold_p_value >= 0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + p_value_dict[snp] = float(p_value) + + if plink_results.has_key(chr_name): + value_list = plink_results[chr_name] + + # pvalue range is [0,1] + if threshold_p_value >=0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + value_list.append((snp, BP, p_value)) + count += 1 + + plink_results[chr_name] = value_list + value_list = [] + else: + if threshold_p_value >= 0 and threshold_p_value <= 1: + if p_value < threshold_p_value: + value_list.append((snp, BP, p_value)) + count += 1 + + if value_list: + plink_results[chr_name] = value_list + + value_list=[] + + line = result_fp.readline() + else: + line = result_fp.readline() + + #if p_value_list: + # min_p_value = min(p_value_list) + #else: + # min_p_value = 0 + + return count, p_value_dict + +###################################################### +# input: line: str,one line read from file +# function: convert line from str to list; +# output: lineList list +####################################################### +def build_line_list(line=None): + line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns + line_list = [item for item in line_list if item <>''] + line_list = map(string.strip, line_list) + + return line_list diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py new file mode 100644 index 00000000..50228b5e --- /dev/null +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -0,0 +1,91 @@ +def gen_reaper_results(this_trait, dataset, samples_before, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot): + genotype = dataset.group.read_genotype_file() + + if manhattan_plot != True: + genotype = genotype.addinterval() + + samples, values, variances, sample_aliases = this_trait.export_informative() + + trimmed_samples = [] + trimmed_values = [] + for i in range(0, len(samples)): + if this_trait.data[samples[i]].name in samples_before: + trimmed_samples.append(samples[i]) + trimmed_values.append(values[i]) + + perm_output = [] + bootstrap_results = [] + + if num_perm < 100: + suggestive = 0 + significant = 0 + else: + perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm) + suggestive = perm_output[int(num_perm*0.37-1)] + significant = perm_output[int(num_perm*0.95-1)] + highly_significant = perm_output[int(num_perm*0.99-1)] + + json_data['suggestive'] = suggestive + json_data['significant'] = significant + + if control_marker != "" and do_control == "true": + reaper_results = genotype.regression(strains = trimmed_samples, + trait = trimmed_values, + control = str(control_marker)) + if bootCheck: + control_geno = [] + control_geno2 = [] + _FIND = 0 + for _chr in genotype: + for _locus in _chr: + if _locus.name == control_marker: + control_geno2 = _locus.genotype + _FIND = 1 + break + if _FIND: + break + if control_geno2: + _prgy = list(genotype.prgy) + for _strain in trimmed_samples: + _idx = _prgy.index(_strain) + control_geno.append(control_geno2[_idx]) + + bootstrap_results = genotype.bootstrap(strains = trimmed_samples, + trait = trimmed_values, + control = control_geno, + nboot = num_bootstrap) + else: + reaper_results = genotype.regression(strains = trimmed_samples, + trait = trimmed_values) + + if bootCheck: + bootstrap_results = genotype.bootstrap(strains = trimmed_samples, + trait = trimmed_values, + nboot = num_bootstrap) + + json_data['chr'] = [] + json_data['pos'] = [] + json_data['lod.hk'] = [] + json_data['markernames'] = [] + #if self.additive: + # self.json_data['additive'] = [] + + #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary + qtl_results = [] + for qtl in reaper_results: + reaper_locus = qtl.locus + #ZS: Convert chr to int + converted_chr = reaper_locus.chr + if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y": + converted_chr = int(reaper_locus.chr) + json_data['chr'].append(converted_chr) + json_data['pos'].append(reaper_locus.Mb) + json_data['lod.hk'].append(qtl.lrs) + json_data['markernames'].append(reaper_locus.name) + #if self.additive: + # self.json_data['additive'].append(qtl.additive) + locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb} + qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb, + "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance} + qtl_results.append(qtl) + return qtl_results, json_data, perm_output, suggestive, significant, bootstrap_results diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 93bf717c..f3694f0b 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -1,193 +1,191 @@ -import rpy2.robjects as ro
-import numpy as np
-
-from base.webqtlConfig import TMPDIR
-from utility import webqtlUtil
-from utility.tools import locate, TEMPDIR
-
-def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
- geno_to_rqtl_function(dataset)
-
- ## Get pointers to some common R functions
- r_library = ro.r["library"] # Map the library function
- r_c = ro.r["c"] # Map the c function
- r_sum = ro.r["sum"] # Map the sum function
- plot = ro.r["plot"] # Map the plot function
- postscript = ro.r["postscript"] # Map the postscript function
- png = ro.r["png"] # Map the png function
- dev_off = ro.r["dev.off"] # Map the device off function
-
- print(r_library("qtl")) # Load R/qtl
-
- ## Get pointers to some R/qtl functions
- scanone = ro.r["scanone"] # Map the scanone function
- scantwo = ro.r["scantwo"] # Map the scantwo function
- calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
- read_cross = ro.r["read.cross"] # Map the read.cross function
- write_cross = ro.r["write.cross"] # Map the write.cross function
- GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
-
- crossname = dataset.group.name
- genofilelocation = locate(crossname + ".geno", "genotype")
- crossfilelocation = TMPDIR + crossname + ".cross"
-
- #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
-
- cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
-
- if manhattan_plot:
- cross_object = calc_genoprob(cross_object)
- else:
- cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
-
- cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
-
- # for debug: write_cross(cross_object, "csvr", "test.csvr")
-
- # Scan for QTLs
- covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
-
- if pair_scan:
- if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
- print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
- else:
- print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
-
- #print("Pair scan results:", result_data_frame)
-
- pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
- png(file=TEMPDIR+pair_scan_filename)
- plot(result_data_frame)
- dev_off()
-
- return process_pair_scan_results(result_data_frame)
- else:
- if do_control == "true":
- print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
- else:
- print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
-
- if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
- if do_control == "true":
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
- else:
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
-
- perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
- return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
- else:
- return process_rqtl_results(result_data_frame)
-
-def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-
- ro.r("""
- trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
-
- getGenoCode <- function(header, name = 'unk'){
- mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
- return(trim(strsplit(header[mat],':')[[1]][2]))
- }
-
- GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
- header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
- toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
-
- genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
- type <- getGenoCode(header, 'type')
- genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
- cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
- if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
- if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
- outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
- c('sex', '', '', sex), # Sex phenotype for the mice
- cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
- write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
- require(qtl)
- cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
- if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
- return(cross)
- }
- """ % (dataset.group.name + ".geno"))
-
-def add_phenotype(cross, pheno_as_string):
- ro.globalenv["the_cross"] = cross
- ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
- return ro.r["the_cross"]
-
-def create_covariates(control_marker, cross):
- ro.globalenv["the_cross"] = cross
- ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
- userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
- covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
- #print("Marker names of selected covariates:", covariate_names)
- ro.r('covnames <- c(' + covariate_names + ')')
- ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
- ro.r('covnames <- covnames[covInGeno]')
- ro.r("cat('covnames (purged): ', covnames,'\n')")
- ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
- #print("R/qtl matrix of covariates:", ro.r["covariates"])
- return ro.r["covariates"]
-
-def sanitize_rqtl_phenotype(vals):
- pheno_as_string = "c("
- for i, val in enumerate(vals):
- if val == "x":
- if i < (len(vals) - 1):
- pheno_as_string += "NA,"
- else:
- pheno_as_string += "NA"
- else:
- if i < (len(vals) - 1):
- pheno_as_string += str(val) + ","
- else:
- pheno_as_string += str(val)
- pheno_as_string += ")"
- return pheno_as_string
-
-def process_pair_scan_results(result):
- pair_scan_results = []
-
- result = result[1]
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scantwo output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr1'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['chr2'] = int(output[i][2])
- pair_scan_results.append(marker)
-
- #print("pair_scan_results:", pair_scan_results)
-
- return pair_scan_results
-
-def process_rqtl_perm_results(num_perm, results):
- perm_vals = []
- for line in str(results).split("\n")[1:(num_perm+1)]:
- #print("R/qtl permutation line:", line.split())
- perm_vals.append(float(line.split()[1]))
-
- perm_output = perm_vals
- suggestive = np.percentile(np.array(perm_vals), 67)
- significant = np.percentile(np.array(perm_vals), 95)
- print("SIGNIFICANT:", significant)
-
- return perm_output, suggestive, significant
-
-def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
- qtl_results = []
-
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scanone output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['lod_score'] = output[i][2]
- qtl_results.append(marker)
-
- return qtl_results
\ No newline at end of file +import rpy2.robjects as ro +import numpy as np + +from base.webqtlConfig import TMPDIR +from utility import webqtlUtil +from utility.tools import locate, TEMPDIR + +def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan): + geno_to_rqtl_function(dataset) + + ## Get pointers to some common R functions + r_library = ro.r["library"] # Map the library function + r_c = ro.r["c"] # Map the c function + r_sum = ro.r["sum"] # Map the sum function + plot = ro.r["plot"] # Map the plot function + postscript = ro.r["postscript"] # Map the postscript function + png = ro.r["png"] # Map the png function + dev_off = ro.r["dev.off"] # Map the device off function + + print(r_library("qtl")) # Load R/qtl + + ## Get pointers to some R/qtl functions + scanone = ro.r["scanone"] # Map the scanone function + scantwo = ro.r["scantwo"] # Map the scantwo function + calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function + read_cross = ro.r["read.cross"] # Map the read.cross function + write_cross = ro.r["write.cross"] # Map the write.cross function + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function + + crossname = dataset.group.name + genofilelocation = locate(crossname + ".geno", "genotype") + crossfilelocation = TMPDIR + crossname + ".cross" + + #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) + + cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available + + if manhattan_plot: + cross_object = calc_genoprob(cross_object) + else: + cross_object = calc_genoprob(cross_object, step=1, stepwidth="max") + + cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype + + # for debug: write_cross(cross_object, "csvr", "test.csvr") + + # Scan for QTLs + covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix + + if pair_scan: + if do_control == "true": # If sum(covar) > 0 we have a covariate matrix + print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16) + else: + print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) + + #print("Pair scan results:", result_data_frame) + + pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" + png(file=TEMPDIR+pair_scan_filename) + plot(result_data_frame) + dev_off() + + return process_pair_scan_results(result_data_frame) + else: + if do_control == "true": + print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method) + else: + print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) + + if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) + if do_control == "true": + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method) + else: + perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) + + perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface + return perm_output, suggestive, significant, process_rqtl_results(result_data_frame) + else: + return process_rqtl_results(result_data_frame) + +def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly + + ro.r(""" + trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } + + getGenoCode <- function(header, name = 'unk'){ + mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) + return(trim(strsplit(header[mat],':')[[1]][2])) + } + + GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ + header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines + toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers + + genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes + type <- getGenoCode(header, 'type') + genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') + cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') + if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one + if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males + outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype + c('sex', '', '', sex), # Sex phenotype for the mice + cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes + write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file + require(qtl) + cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross + if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl + return(cross) + } + """ % (dataset.group.name + ".geno")) + +def add_phenotype(cross, pheno_as_string): + ro.globalenv["the_cross"] = cross + ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')') + return ro.r["the_cross"] + +def create_covariates(control_marker, cross): + ro.globalenv["the_cross"] = cross + ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix + userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) + #print("Marker names of selected covariates:", covariate_names) + ro.r('covnames <- c(' + covariate_names + ')') + ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') + ro.r('covnames <- covnames[covInGeno]') + ro.r("cat('covnames (purged): ', covnames,'\n')") + ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file + #print("R/qtl matrix of covariates:", ro.r["covariates"]) + return ro.r["covariates"] + +def sanitize_rqtl_phenotype(vals): + pheno_as_string = "c(" + for i, val in enumerate(vals): + if val == "x": + if i < (len(vals) - 1): + pheno_as_string += "NA," + else: + pheno_as_string += "NA" + else: + if i < (len(vals) - 1): + pheno_as_string += str(val) + "," + else: + pheno_as_string += str(val) + pheno_as_string += ")" + return pheno_as_string + +def process_pair_scan_results(result): + pair_scan_results = [] + + result = result[1] + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + #print("R/qtl scantwo output:", output) + + for i, line in enumerate(result.iter_row()): + marker = {} + marker['name'] = result.rownames[i] + marker['chr1'] = output[i][0] + marker['Mb'] = output[i][1] + marker['chr2'] = int(output[i][2]) + pair_scan_results.append(marker) + + return pair_scan_results + +def process_rqtl_perm_results(num_perm, results): + perm_vals = [] + for line in str(results).split("\n")[1:(num_perm+1)]: + #print("R/qtl permutation line:", line.split()) + perm_vals.append(float(line.split()[1])) + + perm_output = perm_vals + suggestive = np.percentile(np.array(perm_vals), 67) + significant = np.percentile(np.array(perm_vals), 95) + + return perm_output, suggestive, significant + +def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop + qtl_results = [] + + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + #print("R/qtl scanone output:", output) + + for i, line in enumerate(result.iter_row()): + marker = {} + marker['name'] = result.rownames[i] + marker['chr'] = output[i][0] + marker['Mb'] = output[i][1] + marker['lod_score'] = output[i][2] + qtl_results.append(marker) + + return qtl_results + diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index c3826a90..210c5708 100644 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -95,29 +95,27 @@ </form> </div> - {% block content %}{% endblock %} + {% block content %} + {% endblock %} - <!-- Footer - ================================================== --> <footer class="footer"> <div class="container"> - - <p> - -GeneNetwork is a framework for web based genetics - launched in 1994 as + <p>GeneNetwork is a framework for web based genetics + launched in 1994 as <a href="http://www.ncbi.nlm.nih.gov/pubmed?term=8043953"> The Portable Dictionary of the Mouse Genome</a> (previously <a href="https://www.ncbi.nlm.nih.gov/pubmed/15043217">WebQTL</a>). </p> - <p>Operated by + <p> + Operated by <a href="mailto:rwilliams@uthsc.edu">Rob Williams</a>, <a href="mailto:lyan6@uthsc.edu">Lei Yan</a>, - <a href="mailto:zachary.a.sloan@gmail.com">Zachary Sloan</a>, and - <a href="mailto:acenteno@uthsc.edu">Arthur Centeno</a>. + <a href="mailto:zachary.a.sloan@gmail.com">Zachary Sloan</a>, + <a href="mailto:acenteno@uthsc.edu">Arthur Centeno</a> and + <a href="http://thebird.nl/">Pjotr Prins</a>. </p> <p>Published in <a href="http://joss.theoj.org/papers/10.21105/joss.00025"><img src="https://camo.githubusercontent.com/846b750f582ae8f1d0b4f7e8fee78bed705c88ba/687474703a2f2f6a6f73732e7468656f6a2e6f72672f7061706572732f31302e32313130352f6a6f73732e30303032352f7374617475732e737667" alt="JOSS" data-canonical-src="http://joss.theoj.org/papers/10.21105/joss.00025/status.svg" style="max-width:100%;"></a> - </p> + </p> <br /> <p>GeneNetwork is supported by:</p> <UL> @@ -143,8 +141,12 @@ GeneNetwork is a framework for web based genetics </li> </UL> <!--</p>--> + <p> + Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a> and find us on <a href="https://webchat.freenode.net/">IRC</a> (#genenetwork channel). + {% if version: %} + <p><small>GeneNetwork v{{ version }}</small></p> + {% endif %} - Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a> </div> </footer> diff --git a/wqflask/wqflask/templates/ctl_results.html b/wqflask/wqflask/templates/ctl_results.html index a5cb1c08..00ccecb6 100644 --- a/wqflask/wqflask/templates/ctl_results.html +++ b/wqflask/wqflask/templates/ctl_results.html @@ -4,7 +4,7 @@ {% block content %} <!-- Start of body --> <div class="container"> <h1>CTL Results</h1> - {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input<br> + {{(request.form['trait_list'].split(',')|length)}} phenotypes as input<br> <h3>Network Figure</h3> <a href="/tmp/{{ results['imgurl1'] }}"> <img alt="Embedded Image" src="data:image/png;base64, diff --git a/wqflask/wqflask/templates/ctl_setup.html b/wqflask/wqflask/templates/ctl_setup.html index 51553322..a05379a8 100644 --- a/wqflask/wqflask/templates/ctl_setup.html +++ b/wqflask/wqflask/templates/ctl_setup.html @@ -12,7 +12,7 @@ </div> {% else %} <h1>CTL analysis parameters</h1> - {{(request.form['trait_list'].split(',')|length -1)}} traits as input + {{(request.form['trait_list'].split(',')|length)}} traits as input <form action="/ctl_results" method="post" class="form-horizontal"> <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}"> diff --git a/wqflask/wqflask/templates/error.html b/wqflask/wqflask/templates/error.html index 7ab2bf2f..c707a4fc 100644 --- a/wqflask/wqflask/templates/error.html +++ b/wqflask/wqflask/templates/error.html @@ -35,7 +35,7 @@ </p> <pre> - {{ stack[0] }} + GeneNetwork v{{ version }} {{ stack[0] }} {{ message }} (error) {{ stack[-3] }} {{ stack[-2] }} @@ -50,7 +50,7 @@ <a href="#Stack" class="btn btn-default" data-toggle="collapse">Toggle full stack trace</a> <div id="Stack" class="collapse"> <pre> - {% for line in stack %} {{ line }} + GeneNetwork v{{ version }} {% for line in stack %} {{ line }} {% endfor %} </pre> </div> diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html index 73d3e718..1694eae5 100755 --- a/wqflask/wqflask/templates/index_page_orig.html +++ b/wqflask/wqflask/templates/index_page_orig.html @@ -1,5 +1,10 @@ {% extends "base.html" %} {% block title %}GeneNetwork{% endblock %} +{% block css %} +<style TYPE="text/css"> + p.interact { display: none; } +</style> +{% endblock %} {% block content %} <!-- Start of body --> @@ -166,8 +171,17 @@ </ul> </section> </div> + <div style="padding-left:120px" class="col-xs-4" style="width: 600px !important;"> - <!-- + <section id="news-section"> + <div class="page-header"> + <h1>News</h1> + </div> + <div id="tweets"></div> + <div align="right"> + <a href="https://twitter.com/GeneNetwork2">more news items...</a> + </div> + <!-- <section id="tour-info"> <div class="page-header"> <h1>Tour and more info</h1> @@ -194,34 +208,37 @@ </section> --> + </section> <section id="websites"> <div class="page-header"> - <h1>Affiliates and mirrors</h1> + <h2>Links</h2> </div> - <h3>Websites affiliated with GeneNetwork</h3> + <h3>GeneNetwork v2:</h3> + <ul> + <li><a href="http://gn2.genenetwork.org/">Main website</a> at UTHSC</li> + <li><a href="http://test-gn2.genenetwork.org/">Testing website</a> at UTHSC</li> + </ul> + <h3>GeneNetwork v1:</h3> <ul> + <li><a href="http://www.genenetwork.org/">Main website</a> at UTHSC</li> + <li><a href="http://genenetwork.helmholtz-hzi.de/">Website</a> at the HZI (Germany)</li> + <li><a href="http://ec2.genenetwork.org/">Amazon + Cloud (EC2)</a></li> + </ul> + <h3>Affiliates</h3> + <ul> <li><a href="http://ucscbrowser.genenetwork.org/">Genome browser</a> at UTHSC</li> <li><a href="http://galaxy.genenetwork.org/">Galaxy</a> at UTHSC</li> - <li>GeneNetwork 1 at <a href="http://ec2.genenetwork.org/">Amazon - Cloud (EC2)</a></li> + </ul> - <li>GeneNetwork 1 Source Code at <a href="http://sourceforge.net/projects/genenetwork/">SourceForge</a></li> - <li>GeneNetwork 2 Source Code at <a href="https://github.com/genenetwork/genenetwork2">GitHub</a></li> - </ul> - <h3>GN1 Mirror and development sites</h3> - <ul> - <li><a href="http://www.genenetwork.org/">Main GN1 site at UTHSC</a> (main site)</li> - <li><a href="http://genenetwork.helmholtz-hzi.de/">Germany at the HZI</a></li> - <li><a href="http://gn2.genenetwork.org/">Memphis at the U of M</a></li> - </ul> - </section> + </section> <!--<section id="getting-started"> <div class="page-header"> @@ -282,4 +299,21 @@ } </script> + <script type="text/javascript" src="/twitter/js/twitterFetcher_min.js"></script> + + <script type="text/javascript"> + var configProfile = { + "profile": {"screenName": 'GeneNetwork2'}, + "domId": 'tweets', + "maxTweets": 5, + "enableLinks": true, + "showUser": false, + "showTime": true, + "showImages": false, + "lang": 'en' + }; + twitterFetcher.fetch(configProfile); + </script> + + {% endblock %} diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index 598af0a6..1e831896 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -158,7 +158,6 @@ def verify_cookie(cookie): assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid - def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = actual_hmac_creation(the_uuid) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 406f8930..822bab9f 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -51,7 +51,7 @@ from wqflask.wgcna import wgcna_analysis from wqflask.ctl import ctl_analysis from utility import temp_data -from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL +from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL,GN_VERSION from base import webqtlFormData from base.webqtlConfig import GENERATED_IMAGE_DIR @@ -108,7 +108,7 @@ def handle_bad_request(e): list = [fn for fn in os.listdir("./wqflask/static/gif/error") if fn.endswith(".gif") ] animation = random.choice(list) - resp = make_response(render_template("error.html",message=err_msg,stack=formatted_lines,error_image=animation)) + resp = make_response(render_template("error.html",message=err_msg,stack=formatted_lines,error_image=animation,version=GN_VERSION)) # logger.error("Set cookie %s with %s" % (err_msg, animation)) resp.set_cookie(err_msg[:32],animation) @@ -124,10 +124,10 @@ def index_page(): g.cookie_session.import_traits_to_user() if USE_GN_SERVER: # The menu is generated using GN_SERVER - return render_template("index_page.html", gn_server_url = GN_SERVER_URL) + return render_template("index_page.html", gn_server_url = GN_SERVER_URL, version=GN_VERSION) else: # Old style static menu (OBSOLETE) - return render_template("index_page_orig.html") + return render_template("index_page_orig.html", version=GN_VERSION) @app.route("/tmp/<img_path>") @@ -143,6 +143,10 @@ def tmp_page(img_path): return render_template("show_image.html", img_base64 = bytesarray ) +@app.route("/twitter/<path:filename>") +def bd_files(filename): + bd_path = app.config['TWITTER_POST_FETCHER_JS_PATH'] + return send_from_directory(bd_path, filename) #@app.route("/data_sharing") #def data_sharing_page(): |