aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/genenetwork214
-rwxr-xr-xbin/test-website12
-rw-r--r--etc/default_settings.py5
-rw-r--r--test/lib/gntest.rb1
-rw-r--r--test/lib/mapping.rb1
-rw-r--r--wqflask/base/webqtlConfig.py22
-rw-r--r--wqflask/utility/tools.py15
-rw-r--r--wqflask/wqflask/collect.py60
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression.py343
-rw-r--r--wqflask/wqflask/marker_regression/plink_mapping.py161
-rw-r--r--wqflask/wqflask/marker_regression/qtlreaper_mapping.py91
-rw-r--r--wqflask/wqflask/marker_regression/rqtl_mapping.py384
-rw-r--r--wqflask/wqflask/templates/base.html28
-rw-r--r--wqflask/wqflask/templates/ctl_results.html2
-rw-r--r--wqflask/wqflask/templates/ctl_setup.html2
-rw-r--r--wqflask/wqflask/templates/error.html4
-rwxr-xr-xwqflask/wqflask/templates/index_page_orig.html64
-rw-r--r--wqflask/wqflask/user_manager.py1
-rw-r--r--wqflask/wqflask/views.py12
19 files changed, 616 insertions, 606 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index d7d1c325..52d3155c 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -26,6 +26,10 @@ GN2_BASE_DIR=$(dirname $(dirname "$SCRIPT"))
echo GN2_BASE_DIR=$GN2_BASE_DIR
+GIT_HASH=$(git rev-parse HEAD)
+GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+export GN_VERSION=$(cat $GN2_BASE_DIR/VERSION)-$GIT_BRANCH-${GIT_HASH:0:9}
+echo GN_VERSION=$GN_VERSION
# Handle settings parameter (can be .py or .json)
settings=$1
@@ -79,13 +83,13 @@ fi
# We may change this one:
export PYTHONPATH=$GN2_BASE_DIR/wqflask:$PYTHONPATH
-# TEMPDIR defaults to /tmp if nothing else
-if [ -z $TEMPDIR ]; then
- TEMPDIR="/tmp"
+# Our UNIX TMPDIR defaults to /tmp - change this on a shared server
+if [ -z $TMPDIR ]; then
+ TMPDIR="/tmp"
fi
set|grep $GN2_PROFILE
-set|grep TEMPDIR
+set|grep TMPDIR
# Now handle command parameter -c
if [ "$1" = '-c' ] ; then
@@ -98,7 +102,7 @@ if [ "$1" = '-c' ] ; then
fi
echo "Starting the redis server:"
-echo -n "dir $TEMPDIR
+echo -n "dir $TMPDIR
dbfilename gn2.rdb
" | redis-server - &
diff --git a/bin/test-website b/bin/test-website
index c9a72a5e..be223d94 100755
--- a/bin/test-website
+++ b/bin/test-website
@@ -52,12 +52,12 @@ opts = OptionParser.new do |o|
options[:link_checker] = true
end
- o.on('--navigation-test', 'Check for navigation') do
- options[:navigation_test] = true
+ o.on('--navigation', 'Check for navigation') do
+ options[:navigation] = true
end
- o.on('--mapping-test', 'Check for mapping') do
- options[:mapping_test] = true
+ o.on('--mapping', 'Check for mapping') do
+ options[:mapping] = true
end
o.on('--skip-broken', 'Skip tests that are known to be broken') do
@@ -98,7 +98,7 @@ $: << File.join(libpath,'test/lib')
require 'main_web_functionality'
-if options[:all] or options[:mapping_test]
+if options[:all] or options[:mapping]
require 'mapping'
end
@@ -106,6 +106,6 @@ if options[:all] or options[:link_checker]
require 'link_checker'
end
-if options[:all] or options[:navigation_test]
+if options[:all] or options[:navigation]
require 'navigation'
end
diff --git a/etc/default_settings.py b/etc/default_settings.py
index d8e57f38..6acea637 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -18,6 +18,7 @@
import os
import sys
+GN_VERSION = open("../VERSION","r").read()
SQL_URI = "mysql://gn2:mysql_password@localhost/db_webqtl_s"
SQLALCHEMY_DATABASE_URI = 'mysql://gn2:mysql_password@localhost/db_webqtl_s'
SQLALCHEMY_POOL_RECYCLE = 3600
@@ -50,6 +51,10 @@ LOG_BENCH = True # Log bench marks
USE_REDIS = True # REDIS caching (note that redis will be phased out)
USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls
+# Paths to JS libraries
+
+TWITTER_POST_FETCHER_JS_PATH = os.environ['HOME']+"/genenetwork/Twitter-Post-Fetcher"
+
# ---- Path overrides for Genenetwork
# TMPDIR is normally picked up from the environment
HOME=os.environ['HOME']
diff --git a/test/lib/gntest.rb b/test/lib/gntest.rb
index 865ef51d..5b21b3d5 100644
--- a/test/lib/gntest.rb
+++ b/test/lib/gntest.rb
@@ -1,5 +1,6 @@
require 'minitest/autorun'
require 'mechanize'
+require 'json'
# ---- Use some default parameters if not set
$host = "http://localhost:5003" if !$host
diff --git a/test/lib/mapping.rb b/test/lib/mapping.rb
index d6a3cd7b..20e5bd40 100644
--- a/test/lib/mapping.rb
+++ b/test/lib/mapping.rb
@@ -36,6 +36,7 @@ describe MappingTest do
json,
({'Content-Type' => 'application/x-www-form-urlencoded'}))
form = page.forms_with("marker_regression")[0]
+ p form
form.fields.select { |fld| fld.name == 'dataset' }.first.value.must_equal 'HC_M2_0606_P'
form.fields.select { |fld| fld.name == 'value:BXD1' }.first.value.must_equal "15.034"
end
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 6bbabdec..e5f10edf 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -8,7 +8,7 @@
#
#########################################
-from utility.tools import valid_path, mk_dir, assert_dir, flat_files, TEMPDIR
+from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
#Debug Level
#1 for debug, mod python will reload import each time
@@ -60,24 +60,36 @@ ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?sp
# HTMLPATH is replaced by GENODIR
# IMGDIR is replaced by GENERATED_IMAGE_DIR
-# Temporary storage (note that this TMPDIR is not the same directory
-# as the UNIX TMPDIR)
+# Temporary storage (note that this TMPDIR can be set as an
+# environment variable - use utility.tools.TEMPDIR when you
+# want to reach this base dir
+assert_writable_dir(TEMPDIR)
+
TMPDIR = mk_dir(TEMPDIR+'/gn2/')
+assert_writable_dir(TMPDIR)
+
CACHEDIR = mk_dir(TMPDIR+'/cache/')
# We can no longer write into the git tree:
GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generated/')
GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generated_text/')
+# Make sure we have permissions to access these
+assert_writable_dir(CACHEDIR)
+assert_writable_dir(GENERATED_IMAGE_DIR)
+assert_writable_dir(GENERATED_TEXT_DIR)
+
# Flat file directories
GENODIR = flat_files('genotype')+'/'
+assert_dir(GENODIR)
+
+# JSON genotypes are OBSOLETE
JSON_GENODIR = flat_files('genotype/json')+'/'
if not valid_path(JSON_GENODIR):
# fall back on old location (move the dir, FIXME)
JSON_GENODIR = flat_files('json')
-assert_dir(GENODIR)
+# Are we using the following...?
PORTADDR = "http://50.16.251.170"
-
INFOPAGEHREF = '/dbdoc/%s.html'
CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
SCRIPTFILE = 'main.py'
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 23d6fb62..df032e48 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -113,6 +113,16 @@ def assert_dir(dir):
raise Exception("ERROR: can not find directory "+dir)
return dir
+def assert_writable_dir(dir):
+ try:
+ fn = dir + "/test.txt"
+ fh = open( fn, 'w' )
+ fh.write("I am writing this text to the file\n")
+ fh.close()
+ os.remove(fn)
+ except IOError:
+ raise Exception('Unable to write test.txt to directory ' + dir )
+ return dir
def mk_dir(dir):
if not valid_path(dir):
@@ -187,6 +197,7 @@ def show_settings():
# Cached values
+GN_VERSION = get_setting('GN_VERSION')
HOME = get_setting('HOME')
WEBSERVER_MODE = get_setting('WEBSERVER_MODE')
GN_SERVER_URL = get_setting('GN_SERVER_URL')
@@ -205,7 +216,7 @@ GENENETWORK_FILES = get_setting('GENENETWORK_FILES')
PYLMM_COMMAND = pylmm_command()
GEMMA_COMMAND = gemma_command()
PLINK_COMMAND = plink_command()
-TEMPDIR = tempdir()
+TEMPDIR = tempdir() # defaults to UNIX TMPDIR
from six import string_types
@@ -221,3 +232,5 @@ if os.environ.get('WQFLASK_OVERRIDES'):
else:
OVERRIDES[k] = cmd
logger.debug(OVERRIDES)
+
+assert_dir(get_setting("TWITTER_POST_FETCHER_JS_PATH"))
diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py
index 81d03d6c..70ae2a1c 100644
--- a/wqflask/wqflask/collect.py
+++ b/wqflask/wqflask/collect.py
@@ -55,7 +55,7 @@ class AnonCollection(object):
#ZS: Find id and set it if the collection doesn't already exist
if Redis.get(self.key) == "None" or Redis.get(self.key) == None:
- Redis.set(self.key, None) #ZS: For some reason I get the error "Operation against a key holding the wrong kind of value" if I don't do this
+ Redis.set(self.key, None) #ZS: For some reason I get the error "Operation against a key holding the wrong kind of value" if I don't do this
else:
collections_list = json.loads(Redis.get(self.key))
collection_position = 0 #ZS: Position of collection in collection_list, if it exists
@@ -66,9 +66,10 @@ class AnonCollection(object):
collection_exists = True
self.id = collection['id']
break
+
if self.id == None:
self.id = str(uuid.uuid4())
-
+
def get_members(self):
traits = []
collections_list = json.loads(Redis.get(self.key))
@@ -76,16 +77,16 @@ class AnonCollection(object):
if collection['id'] == self.id:
traits = collection['members']
return traits
-
+
@property
def num_members(self):
- num_members = 0
+ num_members = 0
collections_list = json.loads(Redis.get(self.key))
for collection in collections_list:
if collection['id'] == self.id:
num_members = collection['num_members']
return num_members
-
+
def add_traits(self, params):
#assert collection_name == "Default", "Unexpected collection name for anonymous user"
self.traits = list(process_traits(params['traits']))
@@ -122,7 +123,7 @@ class AnonCollection(object):
"num_members" : len(self.traits),
"members" : self.traits}
collections_list.append(collection_dict)
-
+
Redis.set(self.key, json.dumps(collections_list))
#Redis.sadd(self.key, *list(traits))
#Redis.expire(self.key, 60 * 60 * 24 * 5)
@@ -169,14 +170,14 @@ class UserCollection(object):
len_before = len(members)
traits = process_traits(params['traits'])
-
+
members_now = members
for trait in traits:
if trait in members:
continue
else:
members_now.append(trait)
-
+
#members_now = list(members | traits)
len_now = len(members_now)
uc.members = json.dumps(members_now)
@@ -191,6 +192,18 @@ class UserCollection(object):
# Probably have to change that
return redirect(url_for('view_collection', uc_id=uc.id))
+def process_traits(unprocessed_traits):
+ #print("unprocessed_traits are:", unprocessed_traits)
+ if isinstance(unprocessed_traits, basestring):
+ unprocessed_traits = unprocessed_traits.split(",")
+ traits = set()
+ for trait in unprocessed_traits:
+ #print("trait is:", trait)
+ data, _separator, hmac = trait.rpartition(':')
+ data = data.strip()
+ assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?"
+ traits.add (str(data))
+ return traits
def report_change(len_before, len_now):
new_length = len_now - len_before
@@ -224,11 +237,10 @@ def collections_add():
collections = collection_names,
)
-
@app.route("/collections/new")
def collections_new():
params = request.args
-
+
if "sign_in" in params:
return redirect(url_for('login'))
@@ -248,26 +260,12 @@ def collections_new():
else:
CauseAnError
-
-def process_traits(unprocessed_traits):
- #print("unprocessed_traits are:", unprocessed_traits)
- if isinstance(unprocessed_traits, basestring):
- unprocessed_traits = unprocessed_traits.split(",")
- traits = set()
- for trait in unprocessed_traits:
- #print("trait is:", trait)
- data, _separator, hmac = trait.rpartition(':')
- data = data.strip()
- assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?"
- traits.add (str(data))
- return traits
-
def create_new(collection_name):
params = request.args
-
+
unprocessed_traits = params['traits']
traits = process_traits(unprocessed_traits)
-
+
if g.user_session.logged_in:
uc = model.UserCollection()
uc.name = collection_name
@@ -280,7 +278,7 @@ def create_new(collection_name):
else:
current_collections = user_manager.AnonUser().get_collections()
ac = AnonCollection(collection_name)
- ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p')
+ ac.changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p')
ac.add_traits(params)
return redirect(url_for('view_collection', collection_id=ac.id))
@@ -345,7 +343,7 @@ def delete_collection():
else:
collection_name = params['collection_name']
user_manager.AnonUser().delete_collection(collection_name)
-
+
flash("We've deleted the collection: {}.".format(collection_name), "alert-info")
return redirect(url_for('list_collections'))
@@ -369,15 +367,15 @@ def view_collection():
break
#this_collection = user_collections[params['collection_id']]
traits = this_collection['members']
-
+
print("in view_collection traits are:", traits)
trait_obs = []
json_version = []
for atrait in traits:
- name, dataset_name = atrait.split(':')
-
+ name, dataset_name = atrait.split(':')
+
trait_ob = trait.GeneralTrait(name=name, dataset_name=dataset_name)
trait_ob.retrieve_info(get_qtl_info=True)
trait_obs.append(trait_ob)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 543eeede..4d622f21 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -35,7 +35,7 @@ from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
from utility.benchmark import Bench
-from wqflask.marker_regression import gemma_mapping, rqtl_mapping
+from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping
from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND, TEMPDIR
from utility.external import shell
@@ -171,7 +171,7 @@ class MarkerRegression(object):
if start_vars['pair_scan'] == "true":
self.pair_scan = True
if self.permCheck and self.num_perm > 0:
- perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
+ self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
else:
results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
elif self.mapping_method == "reaper":
@@ -203,9 +203,19 @@ class MarkerRegression(object):
self.do_control = start_vars['do_control']
self.dataset.group.genofile = start_vars['genofile']
logger.info("Running qtlreaper")
- results = self.gen_reaper_results()
+ results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait,
+ self.dataset,
+ self.samples,
+ self.json_data,
+ self.num_perm,
+ self.bootCheck,
+ self.num_bootstrap,
+ self.do_control,
+ self.control_marker,
+ self.manhattan_plot)
elif self.mapping_method == "plink":
- results = self.run_plink()
+ results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf)
+ #results = self.run_plink()
elif self.mapping_method == "pylmm":
logger.debug("RUNNING PYLMM")
self.dataset.group.genofile = start_vars['genofile']
@@ -303,55 +313,12 @@ class MarkerRegression(object):
perm_results = self.perm_output,
)
-
- def run_gemma(self):
- """Generates p-values for each marker using GEMMA"""
-
- self.gen_pheno_txt_file()
-
- #os.chdir("/home/zas1024/gene/web/gemma")
-
- gemma_command = GEMMA_COMMAND + ' -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (
- self.dataset.group.name,
- self.dataset.group.name,
- self.dataset.group.name)
- #logger.debug("gemma_command:" + gemma_command)
-
- os.system(gemma_command)
-
- included_markers, p_values = self.parse_gemma_output()
-
- self.dataset.group.get_specified_markers(markers = included_markers)
- self.dataset.group.markers.add_pvalues(p_values)
- return self.dataset.group.markers.markers
-
- def parse_gemma_output(self):
- included_markers = []
- p_values = []
- # Use a temporary file name here!
- with open(webqtlConfig.GENERATED_TEXT_DIR+"/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:
- for line in output_file:
- if line.startswith("chr"):
- continue
- else:
- included_markers.append(line.split("\t")[1])
- p_values.append(float(line.split("\t")[10]))
- #p_values[line.split("\t")[1]] = float(line.split("\t")[10])
- #logger.debug("p_values: ", p_values)
- return included_markers, p_values
-
- def gen_pheno_txt_file(self):
- """Generates phenotype file for GEMMA"""
- with open(webqtlConfig.GENERATED_TEXT_DIR+"{}.fam".format(self.dataset.group.name), "w") as outfile:
- for i, sample in enumerate(self.samples):
- outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n")
-
def run_rqtl_plink(self):
# os.chdir("") never do this inside a webserver!!
output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
- self.gen_pheno_txt_file_plink(pheno_filename = output_filename)
+ plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename)
rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
@@ -359,286 +326,6 @@ class MarkerRegression(object):
count, p_values = self.parse_rqtl_output(plink_output_filename)
- def run_plink(self):
- plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
-
- self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)
-
- plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
- logger.debug("plink_command:", plink_command)
-
- os.system(plink_command)
-
- count, p_values = self.parse_plink_output(plink_output_filename)
-
- #for marker in self.dataset.group.markers.markers:
- # if marker['name'] not in included_markers:
- # logger.debug("marker:", marker)
- # self.dataset.group.markers.markers.remove(marker)
- # #del self.dataset.group.markers.markers[marker]
-
- logger.debug("p_values:", pf(p_values))
-
- self.dataset.group.markers.add_pvalues(p_values)
-
- return self.dataset.group.markers.markers
-
-
- def gen_pheno_txt_file_plink(self, pheno_filename = ''):
- ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
- header = 'FID\tIID\t%s\n' % self.this_trait.name
- output_file.write(header)
-
- new_value_list = []
-
- #if valueDict does not include some strain, value will be set to -9999 as missing value
- for i, sample in enumerate(ped_sample_list):
- try:
- value = self.vals[i]
- value = str(value).replace('value=','')
- value = value.strip()
- except:
- value = -9999
-
- new_value_list.append(value)
-
-
- new_line = ''
- for i, sample in enumerate(ped_sample_list):
- j = i+1
- value = new_value_list[i]
- new_line += '%s\t%s\t%s\n'%(sample, sample, value)
-
- if j%1000 == 0:
- output_file.write(newLine)
- new_line = ''
-
- if new_line:
- output_file.write(new_line)
-
- output_file.close()
-
- def gen_pheno_txt_file_rqtl(self, pheno_filename = ''):
- ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
- header = 'FID\tIID\t%s\n' % self.this_trait.name
- output_file.write(header)
-
- new_value_list = []
-
- #if valueDict does not include some strain, value will be set to -9999 as missing value
- for i, sample in enumerate(ped_sample_list):
- try:
- value = self.vals[i]
- value = str(value).replace('value=','')
- value = value.strip()
- except:
- value = -9999
-
- new_value_list.append(value)
-
-
- new_line = ''
- for i, sample in enumerate(ped_sample_list):
- j = i+1
- value = new_value_list[i]
- new_line += '%s\t%s\t%s\n'%(sample, sample, value)
-
- if j%1000 == 0:
- output_file.write(newLine)
- new_line = ''
-
- if new_line:
- output_file.write(new_line)
-
- output_file.close()
-
- # get strain name from ped file in order
- def get_samples_from_ped_file(self):
- ped_file= open("{}/{}.ped".format(PLINK_PATH, self.dataset.group.name),"r")
- line = ped_file.readline()
- sample_list=[]
-
- while line:
- lineList = string.split(string.strip(line), '\t')
- lineList = map(string.strip, lineList)
-
- sample_name = lineList[0]
- sample_list.append(sample_name)
-
- line = ped_file.readline()
-
- return sample_list
-
- def gen_reaper_results(self):
- genotype = self.dataset.group.read_genotype_file()
-
- if self.manhattan_plot != True:
- genotype = genotype.addinterval()
-
- samples, values, variances, sample_aliases = self.this_trait.export_informative()
-
- trimmed_samples = []
- trimmed_values = []
- for i in range(0, len(samples)):
- #if self.this_trait.data[samples[i]].name2 in self.dataset.group.samplelist:
- if self.this_trait.data[samples[i]].name in self.samples:
- trimmed_samples.append(samples[i])
- trimmed_values.append(values[i])
-
- if self.num_perm < 100:
- self.suggestive = 0
- self.significant = 0
- else:
- self.perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=self.num_perm)
- self.suggestive = self.perm_output[int(self.num_perm*0.37-1)]
- self.significant = self.perm_output[int(self.num_perm*0.95-1)]
- self.highly_significant = self.perm_output[int(self.num_perm*0.99-1)]
-
- self.json_data['suggestive'] = self.suggestive
- self.json_data['significant'] = self.significant
-
- if self.control_marker != "" and self.do_control == "true":
- reaper_results = genotype.regression(strains = trimmed_samples,
- trait = trimmed_values,
- control = str(self.control_marker))
- if self.bootCheck:
- control_geno = []
- control_geno2 = []
- _FIND = 0
- for _chr in genotype:
- for _locus in _chr:
- if _locus.name == self.control_marker:
- control_geno2 = _locus.genotype
- _FIND = 1
- break
- if _FIND:
- break
- if control_geno2:
- _prgy = list(genotype.prgy)
- for _strain in trimmed_samples:
- _idx = _prgy.index(_strain)
- control_geno.append(control_geno2[_idx])
-
- self.bootstrap_results = genotype.bootstrap(strains = trimmed_samples,
- trait = trimmed_values,
- control = control_geno,
- nboot = self.num_bootstrap)
- else:
- reaper_results = genotype.regression(strains = trimmed_samples,
- trait = trimmed_values)
-
- if self.bootCheck:
- self.bootstrap_results = genotype.bootstrap(strains = trimmed_samples,
- trait = trimmed_values,
- nboot = self.num_bootstrap)
-
- self.json_data['chr'] = []
- self.json_data['pos'] = []
- self.json_data['lod.hk'] = []
- self.json_data['markernames'] = []
- #if self.additive:
- # self.json_data['additive'] = []
-
- #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
- qtl_results = []
- for qtl in reaper_results:
- reaper_locus = qtl.locus
- #ZS: Convert chr to int
- converted_chr = reaper_locus.chr
- if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y":
- converted_chr = int(reaper_locus.chr)
- self.json_data['chr'].append(converted_chr)
- self.json_data['pos'].append(reaper_locus.Mb)
- self.json_data['lod.hk'].append(qtl.lrs)
- self.json_data['markernames'].append(reaper_locus.name)
- #if self.additive:
- # self.json_data['additive'].append(qtl.additive)
- locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb}
- qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb,
- "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance}
- qtl_results.append(qtl)
-
- return qtl_results
-
-
- def parse_plink_output(self, output_filename):
- plink_results={}
-
- threshold_p_value = 0.01
-
- result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb")
-
- header_line = result_fp.readline()# read header line
- line = result_fp.readline()
-
- value_list = [] # initialize value list, this list will include snp, bp and pvalue info
- p_value_dict = {}
- count = 0
-
- while line:
- #convert line from str to list
- line_list = self.build_line_list(line=line)
-
- # only keep the records whose chromosome name is in db
- if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
-
- chr_name = self.species.chromosomes.chromosomes[int(line_list[0])]
- snp = line_list[1]
- BP = line_list[2]
- p_value = float(line_list[-1])
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- p_value_dict[snp] = float(p_value)
-
- if plink_results.has_key(chr_name):
- value_list = plink_results[chr_name]
-
- # pvalue range is [0,1]
- if threshold_p_value >=0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- plink_results[chr_name] = value_list
- value_list = []
- else:
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- if value_list:
- plink_results[chr_name] = value_list
-
- value_list=[]
-
- line = result_fp.readline()
- else:
- line = result_fp.readline()
-
- #if p_value_list:
- # min_p_value = min(p_value_list)
- #else:
- # min_p_value = 0
-
- return count, p_value_dict
-
- ######################################################
- # input: line: str,one line read from file
- # function: convert line from str to list;
- # output: lineList list
- #######################################################
- def build_line_list(self, line=None):
-
- line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
- line_list = [item for item in line_list if item <>'']
- line_list = map(string.strip, line_list)
-
- return line_list
-
-
def run_permutations(self, temp_uuid):
"""Runs permutations and gets significant and suggestive LOD scores"""
diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py
new file mode 100644
index 00000000..b457b9a0
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/plink_mapping.py
@@ -0,0 +1,161 @@
+import string
+import os
+
+from base.webqtlConfig import TMPDIR
+from utility import webqtlUtil
+from utility.tools import PLINK_COMMAND
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def run_plink(this_trait, dataset, species, vals, maf):
+ plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))
+
+ gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename)
+
+ plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (
+ PLINK_PATH, dataset.group.name, PLINK_PATH, dataset.group.name,
+ TMPDIR, plink_output_filename, this_trait.name, maf, TMPDIR,
+ plink_output_filename)
+ logger.debug("plink_command:", plink_command)
+
+ os.system(plink_command)
+
+ count, p_values = parse_plink_output(plink_output_filename, species)
+
+ #for marker in self.dataset.group.markers.markers:
+ # if marker['name'] not in included_markers:
+ # logger.debug("marker:", marker)
+ # self.dataset.group.markers.markers.remove(marker)
+ # #del self.dataset.group.markers.markers[marker]
+
+ logger.debug("p_values:", pf(p_values))
+ dataset.group.markers.add_pvalues(p_values)
+
+ return dataset.group.markers.markers
+
+
+def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''):
+ ped_sample_list = get_samples_from_ped_file(dataset)
+ output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
+ header = 'FID\tIID\t%s\n' % this_trait.name
+ output_file.write(header)
+
+ new_value_list = []
+
+ #if valueDict does not include some strain, value will be set to -9999 as missing value
+ for i, sample in enumerate(ped_sample_list):
+ try:
+ value = vals[i]
+ value = str(value).replace('value=','')
+ value = value.strip()
+ except:
+ value = -9999
+
+ new_value_list.append(value)
+
+ new_line = ''
+ for i, sample in enumerate(ped_sample_list):
+ j = i+1
+ value = new_value_list[i]
+ new_line += '%s\t%s\t%s\n'%(sample, sample, value)
+
+ if j%1000 == 0:
+ output_file.write(newLine)
+ new_line = ''
+
+ if new_line:
+ output_file.write(new_line)
+
+ output_file.close()
+
+# get strain name from ped file in order
+def get_samples_from_ped_file(dataset):
+ ped_file= open("{}/{}.ped".format(PLINK_PATH, dataset.group.name),"r")
+ line = ped_file.readline()
+ sample_list=[]
+
+ while line:
+ lineList = string.split(string.strip(line), '\t')
+ lineList = map(string.strip, lineList)
+
+ sample_name = lineList[0]
+ sample_list.append(sample_name)
+
+ line = ped_file.readline()
+
+ return sample_list
+
+def parse_plink_output(output_filename, species):
+ plink_results={}
+
+ threshold_p_value = 0.01
+
+ result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb")
+
+ header_line = result_fp.readline()# read header line
+ line = result_fp.readline()
+
+ value_list = [] # initialize value list, this list will include snp, bp and pvalue info
+ p_value_dict = {}
+ count = 0
+
+ while line:
+ #convert line from str to list
+ line_list = build_line_list(line=line)
+
+ # only keep the records whose chromosome name is in db
+ if species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
+
+ chr_name = species.chromosomes.chromosomes[int(line_list[0])]
+ snp = line_list[1]
+ BP = line_list[2]
+ p_value = float(line_list[-1])
+ if threshold_p_value >= 0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ p_value_dict[snp] = float(p_value)
+
+ if plink_results.has_key(chr_name):
+ value_list = plink_results[chr_name]
+
+ # pvalue range is [0,1]
+ if threshold_p_value >=0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ value_list.append((snp, BP, p_value))
+ count += 1
+
+ plink_results[chr_name] = value_list
+ value_list = []
+ else:
+ if threshold_p_value >= 0 and threshold_p_value <= 1:
+ if p_value < threshold_p_value:
+ value_list.append((snp, BP, p_value))
+ count += 1
+
+ if value_list:
+ plink_results[chr_name] = value_list
+
+ value_list=[]
+
+ line = result_fp.readline()
+ else:
+ line = result_fp.readline()
+
+ #if p_value_list:
+ # min_p_value = min(p_value_list)
+ #else:
+ # min_p_value = 0
+
+ return count, p_value_dict
+
+######################################################
+# input: line: str,one line read from file
+# function: convert line from str to list;
+# output: lineList list
+#######################################################
+def build_line_list(line=None):
+ line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
+ line_list = [item for item in line_list if item <>'']
+ line_list = map(string.strip, line_list)
+
+ return line_list
diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py
new file mode 100644
index 00000000..50228b5e
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py
@@ -0,0 +1,91 @@
+def gen_reaper_results(this_trait, dataset, samples_before, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot):
+ genotype = dataset.group.read_genotype_file()
+
+ if manhattan_plot != True:
+ genotype = genotype.addinterval()
+
+ samples, values, variances, sample_aliases = this_trait.export_informative()
+
+ trimmed_samples = []
+ trimmed_values = []
+ for i in range(0, len(samples)):
+ if this_trait.data[samples[i]].name in samples_before:
+ trimmed_samples.append(samples[i])
+ trimmed_values.append(values[i])
+
+ perm_output = []
+ bootstrap_results = []
+
+ if num_perm < 100:
+ suggestive = 0
+ significant = 0
+ else:
+ perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm)
+ suggestive = perm_output[int(num_perm*0.37-1)]
+ significant = perm_output[int(num_perm*0.95-1)]
+ highly_significant = perm_output[int(num_perm*0.99-1)]
+
+ json_data['suggestive'] = suggestive
+ json_data['significant'] = significant
+
+ if control_marker != "" and do_control == "true":
+ reaper_results = genotype.regression(strains = trimmed_samples,
+ trait = trimmed_values,
+ control = str(control_marker))
+ if bootCheck:
+ control_geno = []
+ control_geno2 = []
+ _FIND = 0
+ for _chr in genotype:
+ for _locus in _chr:
+ if _locus.name == control_marker:
+ control_geno2 = _locus.genotype
+ _FIND = 1
+ break
+ if _FIND:
+ break
+ if control_geno2:
+ _prgy = list(genotype.prgy)
+ for _strain in trimmed_samples:
+ _idx = _prgy.index(_strain)
+ control_geno.append(control_geno2[_idx])
+
+ bootstrap_results = genotype.bootstrap(strains = trimmed_samples,
+ trait = trimmed_values,
+ control = control_geno,
+ nboot = num_bootstrap)
+ else:
+ reaper_results = genotype.regression(strains = trimmed_samples,
+ trait = trimmed_values)
+
+ if bootCheck:
+ bootstrap_results = genotype.bootstrap(strains = trimmed_samples,
+ trait = trimmed_values,
+ nboot = num_bootstrap)
+
+ json_data['chr'] = []
+ json_data['pos'] = []
+ json_data['lod.hk'] = []
+ json_data['markernames'] = []
+ #if self.additive:
+ # self.json_data['additive'] = []
+
+ #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
+ qtl_results = []
+ for qtl in reaper_results:
+ reaper_locus = qtl.locus
+ #ZS: Convert chr to int
+ converted_chr = reaper_locus.chr
+ if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y":
+ converted_chr = int(reaper_locus.chr)
+ json_data['chr'].append(converted_chr)
+ json_data['pos'].append(reaper_locus.Mb)
+ json_data['lod.hk'].append(qtl.lrs)
+ json_data['markernames'].append(reaper_locus.name)
+ #if self.additive:
+ # self.json_data['additive'].append(qtl.additive)
+ locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb}
+ qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb,
+ "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance}
+ qtl_results.append(qtl)
+ return qtl_results, json_data, perm_output, suggestive, significant, bootstrap_results
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 93bf717c..f3694f0b 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -1,193 +1,191 @@
-import rpy2.robjects as ro
-import numpy as np
-
-from base.webqtlConfig import TMPDIR
-from utility import webqtlUtil
-from utility.tools import locate, TEMPDIR
-
-def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
- geno_to_rqtl_function(dataset)
-
- ## Get pointers to some common R functions
- r_library = ro.r["library"] # Map the library function
- r_c = ro.r["c"] # Map the c function
- r_sum = ro.r["sum"] # Map the sum function
- plot = ro.r["plot"] # Map the plot function
- postscript = ro.r["postscript"] # Map the postscript function
- png = ro.r["png"] # Map the png function
- dev_off = ro.r["dev.off"] # Map the device off function
-
- print(r_library("qtl")) # Load R/qtl
-
- ## Get pointers to some R/qtl functions
- scanone = ro.r["scanone"] # Map the scanone function
- scantwo = ro.r["scantwo"] # Map the scantwo function
- calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
- read_cross = ro.r["read.cross"] # Map the read.cross function
- write_cross = ro.r["write.cross"] # Map the write.cross function
- GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
-
- crossname = dataset.group.name
- genofilelocation = locate(crossname + ".geno", "genotype")
- crossfilelocation = TMPDIR + crossname + ".cross"
-
- #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
-
- cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
-
- if manhattan_plot:
- cross_object = calc_genoprob(cross_object)
- else:
- cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
-
- cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
-
- # for debug: write_cross(cross_object, "csvr", "test.csvr")
-
- # Scan for QTLs
- covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
-
- if pair_scan:
- if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
- print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
- else:
- print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
-
- #print("Pair scan results:", result_data_frame)
-
- pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
- png(file=TEMPDIR+pair_scan_filename)
- plot(result_data_frame)
- dev_off()
-
- return process_pair_scan_results(result_data_frame)
- else:
- if do_control == "true":
- print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
- else:
- print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
-
- if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
- if do_control == "true":
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
- else:
- perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
-
- perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
- return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
- else:
- return process_rqtl_results(result_data_frame)
-
-def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-
- ro.r("""
- trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
-
- getGenoCode <- function(header, name = 'unk'){
- mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
- return(trim(strsplit(header[mat],':')[[1]][2]))
- }
-
- GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
- header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
- toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
-
- genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
- type <- getGenoCode(header, 'type')
- genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
- cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
- if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
- if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
- outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
- c('sex', '', '', sex), # Sex phenotype for the mice
- cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
- write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
- require(qtl)
- cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
- if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
- return(cross)
- }
- """ % (dataset.group.name + ".geno"))
-
-def add_phenotype(cross, pheno_as_string):
- ro.globalenv["the_cross"] = cross
- ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
- return ro.r["the_cross"]
-
-def create_covariates(control_marker, cross):
- ro.globalenv["the_cross"] = cross
- ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
- userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
- covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
- #print("Marker names of selected covariates:", covariate_names)
- ro.r('covnames <- c(' + covariate_names + ')')
- ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
- ro.r('covnames <- covnames[covInGeno]')
- ro.r("cat('covnames (purged): ', covnames,'\n')")
- ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
- #print("R/qtl matrix of covariates:", ro.r["covariates"])
- return ro.r["covariates"]
-
-def sanitize_rqtl_phenotype(vals):
- pheno_as_string = "c("
- for i, val in enumerate(vals):
- if val == "x":
- if i < (len(vals) - 1):
- pheno_as_string += "NA,"
- else:
- pheno_as_string += "NA"
- else:
- if i < (len(vals) - 1):
- pheno_as_string += str(val) + ","
- else:
- pheno_as_string += str(val)
- pheno_as_string += ")"
- return pheno_as_string
-
-def process_pair_scan_results(result):
- pair_scan_results = []
-
- result = result[1]
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scantwo output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr1'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['chr2'] = int(output[i][2])
- pair_scan_results.append(marker)
-
- #print("pair_scan_results:", pair_scan_results)
-
- return pair_scan_results
-
-def process_rqtl_perm_results(num_perm, results):
- perm_vals = []
- for line in str(results).split("\n")[1:(num_perm+1)]:
- #print("R/qtl permutation line:", line.split())
- perm_vals.append(float(line.split()[1]))
-
- perm_output = perm_vals
- suggestive = np.percentile(np.array(perm_vals), 67)
- significant = np.percentile(np.array(perm_vals), 95)
- print("SIGNIFICANT:", significant)
-
- return perm_output, suggestive, significant
-
-def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
- qtl_results = []
-
- output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
- #print("R/qtl scanone output:", output)
-
- for i, line in enumerate(result.iter_row()):
- marker = {}
- marker['name'] = result.rownames[i]
- marker['chr'] = output[i][0]
- marker['Mb'] = output[i][1]
- marker['lod_score'] = output[i][2]
- qtl_results.append(marker)
-
- return qtl_results \ No newline at end of file
+import rpy2.robjects as ro
+import numpy as np
+
+from base.webqtlConfig import TMPDIR
+from utility import webqtlUtil
+from utility.tools import locate, TEMPDIR
+
+def run_rqtl_geno(vals, dataset, method, model, permCheck, num_perm, do_control, control_marker, manhattan_plot, pair_scan):
+ geno_to_rqtl_function(dataset)
+
+ ## Get pointers to some common R functions
+ r_library = ro.r["library"] # Map the library function
+ r_c = ro.r["c"] # Map the c function
+ r_sum = ro.r["sum"] # Map the sum function
+ plot = ro.r["plot"] # Map the plot function
+ postscript = ro.r["postscript"] # Map the postscript function
+ png = ro.r["png"] # Map the png function
+ dev_off = ro.r["dev.off"] # Map the device off function
+
+ print(r_library("qtl")) # Load R/qtl
+
+ ## Get pointers to some R/qtl functions
+ scanone = ro.r["scanone"] # Map the scanone function
+ scantwo = ro.r["scantwo"] # Map the scantwo function
+ calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
+ read_cross = ro.r["read.cross"] # Map the read.cross function
+ write_cross = ro.r["write.cross"] # Map the write.cross function
+ GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
+
+ crossname = dataset.group.name
+ genofilelocation = locate(crossname + ".geno", "genotype")
+ crossfilelocation = TMPDIR + crossname + ".cross"
+
+ #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
+
+ cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available
+
+ if manhattan_plot:
+ cross_object = calc_genoprob(cross_object)
+ else:
+ cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+
+ cross_object = add_phenotype(cross_object, sanitize_rqtl_phenotype(vals)) # Add the phenotype
+
+ # for debug: write_cross(cross_object, "csvr", "test.csvr")
+
+ # Scan for QTLs
+ covar = create_covariates(control_marker, cross_object) # Create the additive covariate matrix
+
+ if pair_scan:
+ if do_control == "true": # If sum(covar) > 0 we have a covariate matrix
+ print("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method, n_cluster = 16)
+ else:
+ print("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
+
+ #print("Pair scan results:", result_data_frame)
+
+ pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
+ png(file=TEMPDIR+pair_scan_filename)
+ plot(result_data_frame)
+ dev_off()
+
+ return process_pair_scan_results(result_data_frame)
+ else:
+ if do_control == "true":
+ print("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covar, model=model, method=method)
+ else:
+ print("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
+
+ if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user)
+ if do_control == "true":
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covar, n_perm = num_perm, model=model, method=method)
+ else:
+ perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
+
+ perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface
+ return perm_output, suggestive, significant, process_rqtl_results(result_data_frame)
+ else:
+ return process_rqtl_results(result_data_frame)
+
+def geno_to_rqtl_function(dataset): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+
+ ro.r("""
+ trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+
+ getGenoCode <- function(header, name = 'unk'){
+ mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+ return(trim(strsplit(header[mat],':')[[1]][2]))
+ }
+
+ GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+ header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines
+ toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers
+
+ genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes
+ type <- getGenoCode(header, 'type')
+ genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
+ cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
+ if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one
+ if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males
+ outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype
+ c('sex', '', '', sex), # Sex phenotype for the mice
+ cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)])) # Genotypes
+ write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file
+ require(qtl)
+ cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross
+ if(type == 'riset') cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl
+ return(cross)
+ }
+ """ % (dataset.group.name + ".geno"))
+
+def add_phenotype(cross, pheno_as_string):
+ ro.globalenv["the_cross"] = cross
+ ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), the_pheno = '+ pheno_as_string +')')
+ return ro.r["the_cross"]
+
+def create_covariates(control_marker, cross):
+ ro.globalenv["the_cross"] = cross
+ ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix
+ userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user
+ covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+ #print("Marker names of selected covariates:", covariate_names)
+ ro.r('covnames <- c(' + covariate_names + ')')
+ ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
+ ro.r('covnames <- covnames[covInGeno]')
+ ro.r("cat('covnames (purged): ', covnames,'\n')")
+ ro.r('covariates <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file
+ #print("R/qtl matrix of covariates:", ro.r["covariates"])
+ return ro.r["covariates"]
+
+def sanitize_rqtl_phenotype(vals):
+ pheno_as_string = "c("
+ for i, val in enumerate(vals):
+ if val == "x":
+ if i < (len(vals) - 1):
+ pheno_as_string += "NA,"
+ else:
+ pheno_as_string += "NA"
+ else:
+ if i < (len(vals) - 1):
+ pheno_as_string += str(val) + ","
+ else:
+ pheno_as_string += str(val)
+ pheno_as_string += ")"
+ return pheno_as_string
+
+def process_pair_scan_results(result):
+ pair_scan_results = []
+
+ result = result[1]
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scantwo output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr1'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['chr2'] = int(output[i][2])
+ pair_scan_results.append(marker)
+
+ return pair_scan_results
+
+def process_rqtl_perm_results(num_perm, results):
+ perm_vals = []
+ for line in str(results).split("\n")[1:(num_perm+1)]:
+ #print("R/qtl permutation line:", line.split())
+ perm_vals.append(float(line.split()[1]))
+
+ perm_output = perm_vals
+ suggestive = np.percentile(np.array(perm_vals), 67)
+ significant = np.percentile(np.array(perm_vals), 95)
+
+ return perm_output, suggestive, significant
+
+def process_rqtl_results(result): # TODO: how to make this a one liner and not copy the stuff in a loop
+ qtl_results = []
+
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ #print("R/qtl scanone output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['lod_score'] = output[i][2]
+ qtl_results.append(marker)
+
+ return qtl_results
+
diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index c3826a90..210c5708 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -95,29 +95,27 @@
</form>
</div>
- {% block content %}{% endblock %}
+ {% block content %}
+ {% endblock %}
- <!-- Footer
- ================================================== -->
<footer class="footer">
<div class="container">
-
- <p>
-
-GeneNetwork is a framework for web based genetics
- launched in 1994 as
+ <p>GeneNetwork is a framework for web based genetics
+ launched in 1994 as
<a href="http://www.ncbi.nlm.nih.gov/pubmed?term=8043953">
The Portable Dictionary of the Mouse Genome</a> (previously <a href="https://www.ncbi.nlm.nih.gov/pubmed/15043217">WebQTL</a>).
</p>
- <p>Operated by
+ <p>
+ Operated by
<a href="mailto:rwilliams@uthsc.edu">Rob Williams</a>,
<a href="mailto:lyan6@uthsc.edu">Lei Yan</a>,
- <a href="mailto:zachary.a.sloan@gmail.com">Zachary Sloan</a>, and
- <a href="mailto:acenteno@uthsc.edu">Arthur Centeno</a>.
+ <a href="mailto:zachary.a.sloan@gmail.com">Zachary Sloan</a>,
+ <a href="mailto:acenteno@uthsc.edu">Arthur Centeno</a> and
+ <a href="http://thebird.nl/">Pjotr Prins</a>.
</p>
<p>Published in
<a href="http://joss.theoj.org/papers/10.21105/joss.00025"><img src="https://camo.githubusercontent.com/846b750f582ae8f1d0b4f7e8fee78bed705c88ba/687474703a2f2f6a6f73732e7468656f6a2e6f72672f7061706572732f31302e32313130352f6a6f73732e30303032352f7374617475732e737667" alt="JOSS" data-canonical-src="http://joss.theoj.org/papers/10.21105/joss.00025/status.svg" style="max-width:100%;"></a>
- </p>
+ </p>
<br />
<p>GeneNetwork is supported by:</p>
<UL>
@@ -143,8 +141,12 @@ GeneNetwork is a framework for web based genetics
</li>
</UL>
<!--</p>-->
+ <p>
+ Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a> and find us on <a href="https://webchat.freenode.net/">IRC</a> (#genenetwork channel).
+ {% if version: %}
+ <p><small>GeneNetwork v{{ version }}</small></p>
+ {% endif %}
- Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a>
</div>
</footer>
diff --git a/wqflask/wqflask/templates/ctl_results.html b/wqflask/wqflask/templates/ctl_results.html
index a5cb1c08..00ccecb6 100644
--- a/wqflask/wqflask/templates/ctl_results.html
+++ b/wqflask/wqflask/templates/ctl_results.html
@@ -4,7 +4,7 @@
{% block content %} <!-- Start of body -->
<div class="container">
<h1>CTL Results</h1>
- {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input<br>
+ {{(request.form['trait_list'].split(',')|length)}} phenotypes as input<br>
<h3>Network Figure</h3>
<a href="/tmp/{{ results['imgurl1'] }}">
<img alt="Embedded Image" src="data:image/png;base64,
diff --git a/wqflask/wqflask/templates/ctl_setup.html b/wqflask/wqflask/templates/ctl_setup.html
index 51553322..a05379a8 100644
--- a/wqflask/wqflask/templates/ctl_setup.html
+++ b/wqflask/wqflask/templates/ctl_setup.html
@@ -12,7 +12,7 @@
</div>
{% else %}
<h1>CTL analysis parameters</h1>
- {{(request.form['trait_list'].split(',')|length -1)}} traits as input
+ {{(request.form['trait_list'].split(',')|length)}} traits as input
<form action="/ctl_results" method="post" class="form-horizontal">
<input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}">
diff --git a/wqflask/wqflask/templates/error.html b/wqflask/wqflask/templates/error.html
index 7ab2bf2f..c707a4fc 100644
--- a/wqflask/wqflask/templates/error.html
+++ b/wqflask/wqflask/templates/error.html
@@ -35,7 +35,7 @@
</p>
<pre>
- {{ stack[0] }}
+ GeneNetwork v{{ version }} {{ stack[0] }}
{{ message }} (error)
{{ stack[-3] }}
{{ stack[-2] }}
@@ -50,7 +50,7 @@
<a href="#Stack" class="btn btn-default" data-toggle="collapse">Toggle full stack trace</a>
<div id="Stack" class="collapse">
<pre>
- {% for line in stack %} {{ line }}
+ GeneNetwork v{{ version }} {% for line in stack %} {{ line }}
{% endfor %}
</pre>
</div>
diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html
index 73d3e718..1694eae5 100755
--- a/wqflask/wqflask/templates/index_page_orig.html
+++ b/wqflask/wqflask/templates/index_page_orig.html
@@ -1,5 +1,10 @@
{% extends "base.html" %}
{% block title %}GeneNetwork{% endblock %}
+{% block css %}
+<style TYPE="text/css">
+ p.interact { display: none; }
+</style>
+{% endblock %}
{% block content %}
<!-- Start of body -->
@@ -166,8 +171,17 @@
</ul>
</section>
</div>
+
<div style="padding-left:120px" class="col-xs-4" style="width: 600px !important;">
- <!--
+ <section id="news-section">
+ <div class="page-header">
+ <h1>News</h1>
+ </div>
+ <div id="tweets"></div>
+ <div align="right">
+ <a href="https://twitter.com/GeneNetwork2">more news items...</a>
+ </div>
+ <!--
<section id="tour-info">
<div class="page-header">
<h1>Tour and more info</h1>
@@ -194,34 +208,37 @@
</section>
-->
+ </section>
<section id="websites">
<div class="page-header">
- <h1>Affiliates and mirrors</h1>
+ <h2>Links</h2>
</div>
- <h3>Websites affiliated with GeneNetwork</h3>
+ <h3>GeneNetwork v2:</h3>
+ <ul>
+ <li><a href="http://gn2.genenetwork.org/">Main website</a> at UTHSC</li>
+ <li><a href="http://test-gn2.genenetwork.org/">Testing website</a> at UTHSC</li>
+ </ul>
+ <h3>GeneNetwork v1:</h3>
<ul>
+ <li><a href="http://www.genenetwork.org/">Main website</a> at UTHSC</li>
+ <li><a href="http://genenetwork.helmholtz-hzi.de/">Website</a> at the HZI (Germany)</li>
+ <li><a href="http://ec2.genenetwork.org/">Amazon
+ Cloud (EC2)</a></li>
+ </ul>
+ <h3>Affiliates</h3>
+ <ul>
<li><a href="http://ucscbrowser.genenetwork.org/">Genome
browser</a> at UTHSC</li>
<li><a href="http://galaxy.genenetwork.org/">Galaxy</a> at
UTHSC</li>
- <li>GeneNetwork 1 at <a href="http://ec2.genenetwork.org/">Amazon
- Cloud (EC2)</a></li>
+ </ul>
- <li>GeneNetwork 1 Source Code at <a href="http://sourceforge.net/projects/genenetwork/">SourceForge</a></li>
- <li>GeneNetwork 2 Source Code at <a href="https://github.com/genenetwork/genenetwork2">GitHub</a></li>
- </ul>
- <h3>GN1 Mirror and development sites</h3>
- <ul>
- <li><a href="http://www.genenetwork.org/">Main GN1 site at UTHSC</a> (main site)</li>
- <li><a href="http://genenetwork.helmholtz-hzi.de/">Germany at the HZI</a></li>
- <li><a href="http://gn2.genenetwork.org/">Memphis at the U of M</a></li>
- </ul>
- </section>
+ </section>
<!--<section id="getting-started">
<div class="page-header">
@@ -282,4 +299,21 @@
}
</script>
+ <script type="text/javascript" src="/twitter/js/twitterFetcher_min.js"></script>
+
+ <script type="text/javascript">
+ var configProfile = {
+ "profile": {"screenName": 'GeneNetwork2'},
+ "domId": 'tweets',
+ "maxTweets": 5,
+ "enableLinks": true,
+ "showUser": false,
+ "showTime": true,
+ "showImages": false,
+ "lang": 'en'
+ };
+ twitterFetcher.fetch(configProfile);
+ </script>
+
+
{% endblock %}
diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py
index 598af0a6..1e831896 100644
--- a/wqflask/wqflask/user_manager.py
+++ b/wqflask/wqflask/user_manager.py
@@ -158,7 +158,6 @@ def verify_cookie(cookie):
assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?"
return the_uuid
-
def create_signed_cookie():
the_uuid = str(uuid.uuid4())
signature = actual_hmac_creation(the_uuid)
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 406f8930..822bab9f 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -51,7 +51,7 @@ from wqflask.wgcna import wgcna_analysis
from wqflask.ctl import ctl_analysis
from utility import temp_data
-from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL
+from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL,GN_VERSION
from base import webqtlFormData
from base.webqtlConfig import GENERATED_IMAGE_DIR
@@ -108,7 +108,7 @@ def handle_bad_request(e):
list = [fn for fn in os.listdir("./wqflask/static/gif/error") if fn.endswith(".gif") ]
animation = random.choice(list)
- resp = make_response(render_template("error.html",message=err_msg,stack=formatted_lines,error_image=animation))
+ resp = make_response(render_template("error.html",message=err_msg,stack=formatted_lines,error_image=animation,version=GN_VERSION))
# logger.error("Set cookie %s with %s" % (err_msg, animation))
resp.set_cookie(err_msg[:32],animation)
@@ -124,10 +124,10 @@ def index_page():
g.cookie_session.import_traits_to_user()
if USE_GN_SERVER:
# The menu is generated using GN_SERVER
- return render_template("index_page.html", gn_server_url = GN_SERVER_URL)
+ return render_template("index_page.html", gn_server_url = GN_SERVER_URL, version=GN_VERSION)
else:
# Old style static menu (OBSOLETE)
- return render_template("index_page_orig.html")
+ return render_template("index_page_orig.html", version=GN_VERSION)
@app.route("/tmp/<img_path>")
@@ -143,6 +143,10 @@ def tmp_page(img_path):
return render_template("show_image.html",
img_base64 = bytesarray )
+@app.route("/twitter/<path:filename>")
+def bd_files(filename):
+ bd_path = app.config['TWITTER_POST_FETCHER_JS_PATH']
+ return send_from_directory(bd_path, filename)
#@app.route("/data_sharing")
#def data_sharing_page():