about summary refs log tree commit diff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/runserver.py7
-rwxr-xr-xwqflask/wqflask/interval_mapping/interval_mapping.py22
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py44
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py62
-rwxr-xr-xwqflask/wqflask/model.py2
-rw-r--r--wqflask/wqflask/my_pylmm/README.md52
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/__init__.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/benchmark.py44
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py10
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py19
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gn2.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gwas.py80
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/kinship.py83
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py341
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm2.py71
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py37
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py111
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/standalone.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/temp_data.py25
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py48
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee13
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js7
-rw-r--r--wqflask/wqflask/templates/pair_scan_results.html22
-rwxr-xr-xwqflask/wqflask/templates/search_result_page.html2
-rwxr-xr-xwqflask/wqflask/templates/show_trait_mapping_tools.html12
25 files changed, 956 insertions, 380 deletions
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 9d5686a9..5a76d1e2 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -18,9 +18,10 @@ from wqflask import app
 #_ch = logging.StreamHandler()
 #_log.addHandler(_ch)
 
+print app.config
+
 import logging
-#from themodule import TheHandlerYouWant
-file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
+file_handler = logging.FileHandler(app.config['LOGFILE'])
 file_handler.setLevel(logging.DEBUG)
 app.logger.addHandler(file_handler)
 
@@ -28,7 +29,7 @@ import logging_tree
 logging_tree.printout()
 
 app.run(host='0.0.0.0',
-        port=5003,
+        port=app.config['SERVER_PORT'],
         use_debugger=False,
         threaded=True,
         use_reloader=True)
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
index 5eb901f7..5511826a 100755
--- a/wqflask/wqflask/interval_mapping/interval_mapping.py
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -57,13 +57,8 @@ class IntervalMapping(object):
         self.set_options(start_vars)
  
         self.json_data = {}
- 
-        #if self.method == "qtl_reaper":
         self.json_data['lodnames'] = ['lod.hk']
         self.gen_reaper_results(tempdata)
-        #else:
-        #    self.gen_pylmm_results(tempdata)
-        #self.gen_qtl_results(tempdata)
 
         #Get chromosome lengths for drawing the interval map plot
         chromosome_mb_lengths = {}
@@ -93,13 +88,7 @@ class IntervalMapping(object):
     def set_options(self, start_vars):
         """Sets various options (physical/genetic mapping, # permutations, which chromosome"""
         
-        #self.plot_scale = start_vars['scale']
-        #if self.plotScale == 'physic' and not fd.genotype.Mbmap:
-        #    self.plotScale = 'morgan'
-        #self.method = start_vars['mapping_method']
         self.num_permutations = int(start_vars['num_perm'])
-        #self.do_bootstrap = start_vars['do_bootstrap']
-        #self.selected_chr = start_vars['chromosome']
         if start_vars['manhattan_plot'] == "true":
             self.manhattan_plot = True
         else:
@@ -112,9 +101,6 @@ class IntervalMapping(object):
             self.control_locus = start_vars['control_locus']
         else:
             self.control_locus = None
-        #self.weighted_regression = start_vars['weighted']
-        #self.lrs_lod = start_vars['lrs_lod']
-
 
     def gen_qtl_results(self, tempdata):
         """Generates qtl results for plotting interval map"""
@@ -134,7 +120,7 @@ class IntervalMapping(object):
         #if self.weighted_regression:
         #    self.lrs_array = self.genotype.permutation(strains = trimmed_samples,
         #                                                         trait = trimmed_values, 
-        #                                                         variance = _vars,
+        #                                                         variance = variances,
         #                                                         nperm=self.num_permutations)
         #else:
         self.lrs_array = genotype.permutation(strains = trimmed_samples,
@@ -175,12 +161,6 @@ class IntervalMapping(object):
         self.json_data['markernames'] = []
         for qtl in reaper_results:
             reaper_locus = qtl.locus
-            #if reaper_locus.chr == "20":
-            #    print("changing to X")
-            #    self.json_data['chr'].append("X")
-            #else:
-            #    self.json_data['chr'].append(reaper_locus.chr)
-            ##self.json_data['chr'].append(reaper_locus.chr)
             self.json_data['pos'].append(reaper_locus.Mb)
             self.json_data['lod.hk'].append(qtl.lrs)
             self.json_data['markernames'].append(reaper_locus.name)
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
new file mode 100644
index 00000000..b1ab780c
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -0,0 +1,44 @@
+import os
+
+from base import webqtlConfig
+
+def run_gemma(this_dataset, samples, vals): 
+    """Generates p-values for each marker using GEMMA"""
+        
+    print("INSIDE GEMMA_MAPPING")
+
+    gen_pheno_txt_file(this_dataset, samples, vals)
+
+    os.chdir("{}gemma".format(webqtlConfig.HTMLPATH))
+
+    gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o output/%s_output' % (this_dataset.group.name,
+                                                                                    this_dataset.group.name,
+                                                                                    this_dataset.group.name)
+    print("gemma_command:" + gemma_command)
+        
+    os.system(gemma_command)
+        
+    included_markers, p_values = parse_gemma_output(this_dataset)
+
+    return included_markers, p_values
+
+def gen_pheno_txt_file(this_dataset, samples, vals):
+    """Generates phenotype file for GEMMA"""
+                
+    with open("{}gemma/{}.fam".format(webqtlConfig.HTMLPATH, this_dataset.group.name), "w") as outfile:
+        for i, sample in enumerate(samples):
+            outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(vals[i]) + "\n")
+
+def parse_gemma_output(this_dataset):
+    included_markers = []
+    p_values = []
+    with open("{}gemma/output/{}_output.assoc.txt".format(webqtlConfig.HTMLPATH, this_dataset.group.name)) as output_file:
+        for line in output_file:
+            if line.startswith("chr"):
+                continue
+            else:
+                included_markers.append(line.split("\t")[1])
+                p_values.append(float(line.split("\t")[10]))
+
+    print("p_values: ", p_values)
+    return included_markers, p_values
\ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7708356b..bed316d7 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -25,12 +25,17 @@ from redis import Redis
 Redis = Redis()
 
 from flask import Flask, g
+from wqflask import app
 
 from base.trait import GeneralTrait
 from base import data_set
 from base import species
 from base import webqtlConfig
 from utility import webqtlUtil
+from wqflask.marker_regression import qtl_reaper_mapping
+from wqflask.marker_regression import plink_mapping
+from wqflask.marker_regression import gemma_mapping
+from wqflask.marker_regression import rqtl_mapping
 from wqflask.my_pylmm.data import prep_data
 from wqflask.my_pylmm.pyLMM import lmm
 from wqflask.my_pylmm.pyLMM import input
@@ -40,6 +45,14 @@ from utility import temp_data
 
 from utility.benchmark import Bench
 
+import os
+if os.environ.get('PYLMM_PATH') is None:
+    PYLMM_PATH=app.config.get('PYLMM_PATH')
+    if PYLMM_PATH is None:
+        PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
+if not os.path.isfile(PYLMM_PATH+'/lmm.py'):
+    raise 'PYLMM_PATH unknown or faulty'
+PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
 
 class MarkerRegression(object):
 
@@ -73,7 +86,10 @@ class MarkerRegression(object):
  
         self.dataset.group.get_markers()
         if self.mapping_method == "gemma":
-            qtl_results = self.run_gemma()
+            included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
+            self.dataset.group.get_specified_markers(markers = included_markers)
+            self.dataset.group.markers.add_pvalues(p_values)
+            qtl_results = self.dataset.group.markers.markers
         elif self.mapping_method == "rqtl_plink":
             qtl_results = self.run_rqtl_plink()
         elif self.mapping_method == "rqtl_geno":
@@ -88,9 +104,7 @@ class MarkerRegression(object):
 
             if start_vars['pair_scan'] == "true":
                 self.pair_scan = True
-            print("pair scan:", self.pair_scan)
 
-            print("DOING RQTL GENO")
             qtl_results = self.run_rqtl_geno()
             print("qtl_results:", qtl_results)
         elif self.mapping_method == "plink":
@@ -126,8 +140,9 @@ class MarkerRegression(object):
 
         #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
         self.qtl_results = []
-        for qtl in self.filtered_markers:
-            print("lod score is:", qtl['lod_score'])
+        for index,qtl in enumerate(self.filtered_markers):
+            if index<40:
+                print("lod score is:", qtl['lod_score'])
             if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                 print("changing to X")
                 self.json_data['chr'].append("X")
@@ -144,7 +159,7 @@ class MarkerRegression(object):
             self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
             chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
         
-        print("json_data:", self.json_data)
+        # print("json_data:", self.json_data)
         
 
         self.js_data = dict(
@@ -156,11 +171,11 @@ class MarkerRegression(object):
             chromosomes = chromosome_mb_lengths,
             qtl_results = self.filtered_markers,
         )
+        
 
     def run_gemma(self):
         """Generates p-values for each marker using GEMMA"""
         
-        #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))
         self.gen_pheno_txt_file()
 
         os.chdir("/home/zas1024/gene/web/gemma")
@@ -272,7 +287,7 @@ class MarkerRegression(object):
         """)
     
     def run_rqtl_geno(self):
-        print("Calling R/qtl from python")
+        print("Calling R/qtl")
 
         self.geno_to_rqtl_function()
 
@@ -381,9 +396,23 @@ class MarkerRegression(object):
         return pheno_as_string
 
     def process_pair_scan_results(self, result):
-        results = []
+        pair_scan_results = []
+
+        result = result[1]
+        output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+        print("R/qtl scantwo output:", output)
+
+        for i, line in enumerate(result.iter_row()):
+            marker = {}
+            marker['name'] = result.rownames[i]
+            marker['chr1'] = output[i][0]
+            marker['Mb'] = output[i][1]
+            marker['chr2'] = int(output[i][2])
+            pair_scan_results.append(marker)
+
+        print("pair_scan_results:", pair_scan_results)
 
-        return results
+        return pair_scan_results
 
     def process_rqtl_results(self, result):        # TODO: how to make this a one liner and not copy the stuff in a loop
         qtl_results = []
@@ -655,8 +684,7 @@ class MarkerRegression(object):
                 Redis.set(key, json_params)
                 Redis.expire(key, 60*60)
     
-                command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
-                                                                                                                        "other")
+                command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
     
                 os.system(command)
     
@@ -713,8 +741,8 @@ class MarkerRegression(object):
             #            "refit": False,
             #            "temp_data": tempdata}
             
-            print("genotype_matrix:", str(genotype_matrix.tolist()))
-            print("pheno_vector:", str(pheno_vector.tolist()))
+            # print("genotype_matrix:", str(genotype_matrix.tolist()))
+            # print("pheno_vector:", str(pheno_vector.tolist()))
             
             params = dict(pheno_vector = pheno_vector.tolist(),
                         genotype_matrix = genotype_matrix.tolist(),
@@ -732,7 +760,7 @@ class MarkerRegression(object):
             Redis.expire(key, 60*60)
             print("before printing command")
 
-            command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+            command = PYLMM_COMMAND + ' --key {} --species {}'.format(key,
                                                                                                                     "other")
             print("command is:", command)
             print("after printing command")
@@ -745,7 +773,7 @@ class MarkerRegression(object):
             json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
             results = json.loads(json_results[1])
             p_values = [float(result) for result in results['p_values']]
-            print("p_values:", p_values)
+            print("p_values:", p_values[:10])
             #p_values = self.trim_results(p_values)
             t_stats = results['t_stats']
             
@@ -806,7 +834,7 @@ class MarkerRegression(object):
 
         print("Before creating the command")
 
-        command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+        command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
                                                                                                                 "human")
         
         print("command is:", command)
diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py
index fa8c1aab..042cb8df 100755
--- a/wqflask/wqflask/model.py
+++ b/wqflask/wqflask/model.py
@@ -194,4 +194,4 @@ def display_collapsible(number):
 def user_uuid():
     """Unique cookie for a user"""
     user_uuid = request.cookies.get('user_uuid')
-    
\ No newline at end of file
+    
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index f6b0e72d..b844c845 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,21 +1,37 @@
-# RELEASE NOTES
-
-## 0.50-gn2-pre1 release
-
-This is the first test release of multi-core pylmm into GN2. Both
-kinship calculation and GWAS have been made multi-threaded by
-introducing the Python multiprocessing module. Note that only
-run_other has been updated to use the new routines (so human is still
-handled the old way). I have taken care that we can still run both
-old-style and new-style LMM (through passing the 'new_code'
-boolean). This could be an option in the web server for users to
-select and test for any unexpected differences (of which there should
-be none, naturally ;).
-
-The current version can handle missing phenotypes, but as they are
-removed there is no way for GN2 to know what SNPs the P-values belong
-to. A future version will pass a SNP index to allow for missing
-phenotypes.
+# Genenetwork2/pylmm RELEASE NOTES 
+
+## 0.51-gn2 (April 19, 2015)
+
+- Improved GN2 integration
+- Less matrix transposes
+- Able to run pylmm standalone without Redis again (still requires
+  the modules)
+
+## 0.50-gn2 (April 2nd, 2015)
+
+- Replaced the GN2 genotype normalization
+
+## 0.50-gn2-pre2 (March 18, 2015)
+
+- Added abstractions for progress meter and info/debug statements;
+  Redis perc_complete is now updated through a lambda
+
+## 0.50-gn2-pre1 (release, March 17, 2015)
+
+- This is the first test release of multi-core pylmm into GN2. Both
+  kinship calculation and GWAS have been made multi-threaded by
+  introducing the Python multiprocessing module. Note that only
+  run_other has been updated to use the new routines (so human is
+  still handled the old way). I have taken care that we can still run
+  both old-style and new-style LMM (through passing the 'new_code'
+  boolean). This could be an option in the web server for users to
+  select and test for any unexpected differences (of which there
+  should be none, naturally ;).
+
+- The current version can handle missing phenotypes, but as they are
+  removed there is no way for GN2 to know what SNPs the P-values
+  belong to. A future version will pass a SNP index to allow for
+  missing phenotypes.
 
 
   
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index c40c3221..f33c4e74 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre1"
+PYLMM_VERSION="0.51-gn2"
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
new file mode 100644
index 00000000..6c6c9f88
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
@@ -0,0 +1,44 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+class Bench(object):
+    entries = collections.OrderedDict()
+    
+    def __init__(self, name=None):
+        self.name = name
+
+    def __enter__(self):
+        if self.name:
+            print("Starting benchmark: %s" % (self.name))
+        else:
+            print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+        self.start_time = time.time()
+
+    def __exit__(self, type, value, traceback):
+        if self.name:
+            name = self.name
+        else:
+            name = "That"
+
+        time_taken = time.time() - self.start_time
+        print("  %s took: %f seconds" % (name, (time_taken)))
+        
+        if self.name:
+            Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken
+            
+
+    @classmethod
+    def report(cls):
+        total_time = sum((time_taken for time_taken in cls.entries.itervalues()))
+        print("\nTiming report\n")
+        for name, time_taken in cls.entries.iteritems():
+            percent = int(round((time_taken/total_time) * 100))
+            print("[{}%] {}: {}".format(percent, name, time_taken))
+        print()
+        
+    def reset(cls):
+        """Reset the entries"""
+        cls.entries = collections.OrderedDict()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3b6b5d70..4312fed0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -1,5 +1,5 @@
-# This is a converter for common LMM formats, so as to keep complexity
-# outside the main routines. 
+# This is a converter for common LMM formats, so as to keep file
+# reader complexity outside the main routines.
 
 # Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
 #
@@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen
   Convert files for runlmm.py processing. Writes to stdout by default.
 
   try --help for more information
+
+Examples:
+
+  python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno
+
+  python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno
 """
 
 # if len(args) == 0:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index 315fd824..49f32e3a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -37,14 +37,15 @@ def normalize(ind_g):
     Run for every SNP list (for one individual) and return
     normalized SNP genotype values with missing data filled in
     """
-    g1 = np.copy(ind_g)          # avoid side effects
-    x = True - np.isnan(ind_g)   # Matrix of True/False
-    m = ind_g[x].mean()          # Global mean value
-    s = np.sqrt(ind_g[x].var())  # Global stddev
-    g1[np.isnan(ind_g)] = m      # Plug-in mean values for missing data
-    if s == 0:
-        g1 = g1 - m              # Subtract the mean
+    g = np.copy(ind_g)              # copy to avoid side effects
+    missing = np.isnan(g)
+    values = g[True - missing]
+    mean = values.mean()            # Global mean value
+    stddev = np.sqrt(values.var())  # Global stddev
+    g[missing] = mean               # Plug-in mean values for missing data
+    if stddev == 0:
+        g = g - mean                # Subtract the mean
     else:
-        g1 = (g1 - m) / s        # Normalize the deviation
-    return g1
+        g = (g - mean) / stddev     # Normalize the deviation
+    return g
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
new file mode 100644
index 00000000..821195c8
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+#   logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current  = None
+progress_prev_perc     = None
+
+def progress_default_func(location,count,total):
+    global progress_current
+    value = round(count*100.0/total)
+    progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+    global progress_func
+    progress_func = func
+
+def progress(location, count, total):
+    global progress_location
+    global progress_prev_perc
+
+    perc = round(count*100.0/total)
+    if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+        progress_func(location, count, total)
+        logger.info("Progress: %s %d%%" % (location,perc))
+        progress_location = location
+        progress_prev_perc = perc
+
+def mprint(msg,data):
+    """
+    Array/matrix print function
+    """
+    m = np.array(data)
+    if m.ndim == 1:
+        print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+    if m.ndim == 2:
+        print(msg,m.shape,"=\n[",
+              m[0][0:3]," ... ",m[0][-3:],"\n ",
+              m[1][0:3]," ... ",m[1][-3:],"\n  ...\n ",
+              m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+              m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+    logger.critical(msg)
+    raise Exception(msg)
+
+def callbacks():
+    return dict(
+        write = sys.stdout.write,
+        writeln = print,
+        debug = logger.debug,
+        info = logger.info,
+        warning = logger.warning,
+        error = logger.error,
+        critical = logger.critical,
+        fatal = fatal,
+        progress = progress,
+        mprint = mprint
+    )
+
+def uses(*funcs):
+    """
+    Some sugar
+    """
+    return [callbacks()[func] for func in funcs]
+    
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+    # logging.basicConfig(level=logging.DEBUG)
+    logging.debug("Test %i" % (1))
+    d = callbacks()['debug']
+    d("TEST")
+    wrln = callbacks()['writeln']
+    wrln("Hello %i" % 34)
+    progress = callbacks()['progress']
+    progress("I am half way",50,100)
+    list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+    mprint("list",list)
+    matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+    mprint("matrix",matrix)
+    ix,dx = uses("info","debug")
+    ix("ix")
+    dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b901c0e2..247a8729 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -20,7 +20,6 @@
 import pdb
 import time
 import sys
-# from utility import temp_data
 import lmm2
 
 import os
@@ -32,16 +31,25 @@ from lmm2 import LMM2
 import multiprocessing as mp # Multiprocessing is part of the Python stdlib
 import Queue 
 
+# ---- A trick to decide on the environment:
+try:
+    from wqflask.my_pylmm.pyLMM import chunks
+    from gn2 import uses
+except ImportError:
+    has_gn2=False
+    from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
 def formatResult(id,beta,betaSD,ts,ps):
    return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
 
 def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
-   # print("COMPUTE SNP",j,snp_ids,"\n")
    result = []
    for snp_id in snp_ids:
       snp,id = snp_id
       x = snp.reshape((n,1))  # all the SNPs
-      # print "X=",x
       # if refit:
       #    L.fit(X=snp,REML=REML)
       ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
@@ -51,33 +59,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
       q = compute_snp.q
    q.put([j,result])
    return j
-      # PS.append(ps)
-      # TS.append(ts)
-      # return len(result)
-      # compute.q.put(result)
-      # return None
 
 def f_init(q):
    compute_snp.q = q
 
 def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    """
-   Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+   GWAS. The G matrix should be n inds (cols) x m snps (rows)
    """
+   info("In gwas.gwas")
    matrix_initialize()
    cpu_num = mp.cpu_count()
    numThreads = None # for now use all available threads
    kfile2 = False
    reml = restricted_max_likelihood
 
-   sys.stderr.write(str(G.shape)+"\n")
+   mprint("G",G)
    n = G.shape[1] # inds
    inds = n
    m = G.shape[0] # snps
    snps = m
-   sys.stderr.write(str(m)+" SNPs\n")
-   # print "***** GWAS: G",G.shape,G
-   assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
+   info("%s SNPs",snps)
+   assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
 
    # CREATE LMM object for association
    # if not kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
@@ -85,19 +88,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
 
    lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
    if not refit:
-      if verbose: sys.stderr.write("Computing fit for null model\n")
+      info("Computing fit for null model")
       lmm2.fit()  # follow GN model in run_other
-      if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
-      
-   # outFile = "test.out"
-   # out = open(outFile,'w')
-   out = sys.stderr
-
-   def outputResult(id,beta,betaSD,ts,ps):
-      out.write(formatResult(id,beta,betaSD,ts,ps))
-   def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
-   # printOutHead()
+      info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
+            
    res = []
 
    # Set up the pool
@@ -106,26 +100,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    p = mp.Pool(numThreads, f_init, [q])
    collect = []
 
-   # Buffers for pvalues and t-stats
-   # PS = []
-   # TS = []
    count = 0
    job = 0
    jobs_running = 0
+   jobs_completed = 0
    for snp in G:
       snp_id = (snp,'SNPID')
       count += 1
       if count % 1000 == 0:
          job += 1
-         if verbose:
-            sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+         debug("Job %d At SNP %d" % (job,count))
          if numThreads == 1:
-            print "Running on 1 THREAD"
+            debug("Running on 1 THREAD")
             compute_snp(job,n,collect,lmm2,reml,q)
             collect = []
             j,lst = q.get()
-            if verbose:
-               sys.stderr.write("Job "+str(j)+" finished\n")
+            debug("Job "+str(j)+" finished")
+            jobs_completed += 1
+            progress("GWAS2",jobs_completed,snps/1000)
             res.append((j,lst))
          else:
             p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
@@ -134,8 +126,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             while jobs_running > cpu_num:
                try:
                   j,lst = q.get_nowait()
-                  if verbose:
-                     sys.stderr.write("Job "+str(j)+" finished\n")
+                  debug("Job "+str(j)+" finished")
+                  jobs_completed += 1
+                  progress("GWAS2",jobs_completed,snps/1000)
                   res.append((j,lst))
                   jobs_running -= 1
                except Queue.Empty:
@@ -150,24 +143,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
 
    if numThreads==1 or count<1000 or len(collect)>0:
       job += 1
-      print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+      debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
       compute_snp(job,n,collect,lmm2,reml,q)
       collect = []
       j,lst = q.get()
       res.append((j,lst))
-   print "count=",count," running=",jobs_running," collect=",len(collect)
+   debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
    for job in range(jobs_running):
       j,lst = q.get(True,15) # time out
-      if verbose:
-         sys.stderr.write("Job "+str(j)+" finished\n")
+      debug("Job "+str(j)+" finished")
+      jobs_completed += 1
+      progress("GWAS2",jobs_completed,snps/1000)
       res.append((j,lst))
 
-   print "Before sort",[res1[0] for res1 in res]
+   mprint("Before sort",[res1[0] for res1 in res])
    res = sorted(res,key=lambda x: x[0])
-   # if verbose:
-   #    print "res=",res[0][0:10]
-   print "After sort",[res1[0] for res1 in res]
-   print [len(res1[1]) for res1 in res]
+   mprint("After sort",[res1[0] for res1 in res])
+   info([len(res1[1]) for res1 in res])
    ts = [item[0] for j,res1 in res for item in res1]
    ps = [item[1] for j,res1 in res for item in res1]
    return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 0c43587e..1c157fd8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,17 +28,30 @@ import time
 
 from optmatrix import matrix_initialize, matrixMultT
 
+# ---- A trick to decide on the environment:
+try:
+    from wqflask.my_pylmm.pyLMM import chunks
+    from gn2 import uses, progress_set_func
+except ImportError:
+    has_gn2=False
+    import standalone as handlers
+    from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
 def kinship_full(G):
    """
    Calculate the Kinship matrix using a full dot multiplication
    """
-   print G.shape
+   # mprint("kinship_full G",G)
    m = G.shape[0] # snps
    n = G.shape[1] # inds
-   sys.stderr.write(str(m)+" SNPs\n")
-   assert m>n, "n should be larger than m (snps>inds)"
-   m = np.dot(G.T,G)
+   info("%d SNPs",m)
+   assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
+   # m = np.dot(G.T,G)
+   m = matrixMultT(G.T)
    m = m/G.shape[0]
+   # mprint("kinship_full K",m)
    return m
 
 def compute_W(job,G,n,snps,compute_size):
@@ -74,46 +87,38 @@ def f_init(q):
 
 # Calculate the kinship matrix from G (SNPs as rows!), returns K
 #
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
-   numThreads = None
-   if numThreads:
-      numThreads = int(numThreads)
+def kinship(G,computeSize=1000,numThreads=None,useBLAS=False):
+
    matrix_initialize(useBLAS)
-   
-   sys.stderr.write(str(G.shape)+"\n")
+
+   mprint("G",G)
    n = G.shape[1] # inds
    inds = n
    m = G.shape[0] # snps
    snps = m
-   sys.stderr.write(str(m)+" SNPs\n")
-   assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
+   info("%i SNPs" % (m))
+   assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
 
    q = mp.Queue()
    p = mp.Pool(numThreads, f_init, [q])
    cpu_num = mp.cpu_count()
-   print "CPU cores:",cpu_num
-   print snps,computeSize
+   info("CPU cores: %i" % cpu_num)
    iterations = snps/computeSize+1
-   # if testing:
-   #    iterations = 8
-   # jobs = range(0,8) # range(0,iterations)
 
    results = []
-
    K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
 
    completed = 0
    for job in range(iterations):
-      if verbose:
-         sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize)))
+      info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
       W = compute_W(job,G,n,snps,computeSize)
       if numThreads == 1:
          # Single-core
          compute_matrixMult(job,W,q)
          j,x = q.get()
-         if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+         debug("Job "+str(j)+" finished")
+         progress("kinship",j,iterations)
          K_j = x
-         # print j,K_j[:,0]
          K = K + K_j
       else:
          # Multi-core
@@ -123,52 +128,38 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
             time.sleep(0.1)
             try: 
                j,x = q.get_nowait()
-               if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+               debug("Job "+str(j)+" finished")
                K_j = x
-               # print j,K_j[:,0]
                K = K + K_j
                completed += 1
+               progress("kinship",completed,iterations)
             except Queue.Empty:
                pass
          
    if numThreads == None or numThreads > 1:
-      # results contains the growing result set
       for job in range(len(results)-completed):
          j,x = q.get(True,15)
-         if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+         debug("Job "+str(j)+" finished")
          K_j = x
-         # print j,K_j[:,0]
          K = K + K_j
          completed += 1
+         progress("kinship",completed,iterations)
 
    K = K / float(snps)
-   
-   # outFile = 'runtest.kin'
-   # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
-   # np.savetxt(outFile,K)
-
-   # if saveKvaKve:
-   #    if verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
-   #    Kva,Kve = linalg.eigh(K)
-   #    if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
-   #    np.savetxt(outFile+".kva",Kva)
-   #    np.savetxt(outFile+".kve",Kve)
    return K      
 
-def kvakve(K, verbose=True):
+def kvakve(K):
    """
    Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
    of small values < 1e-6 (notably smaller than zero)
    """
-   if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-   
+   info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
    Kva,Kve = linalg.eigh(K)
-   if verbose:
-      print("Kva is: ", Kva.shape, Kva)
-      print("Kve is: ", Kve.shape, Kve)
+   mprint("Kva",Kva)
+   mprint("Kve",Kve)
 
-   if sum(Kva < 1e-6):
-      if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
+   if sum(Kva < 0):
+      info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
       Kva[Kva < 1e-6] = 1e-6
    return Kva,Kve
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 53689071..2a0c7fdc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,49 +19,59 @@ from __future__ import absolute_import, print_function, division
 
 import sys
 import time
-import argparse
 import uuid
 
 import numpy as np
 from scipy import linalg
 from scipy import optimize
 from scipy import stats
-import pdb
+# import pdb
 
-import simplejson as json
-
-import gzip
-import zlib
+# import gzip
+# import zlib
 import datetime
-import cPickle as pickle
-import simplejson as json
-
+# import cPickle as pickle
 from pprint import pformat as pf
 
-from redis import Redis
-Redis = Redis()
-
-import sys
-sys.path.append("/home/zas1024/gene/wqflask/")
-
-has_gn2=True
-
-from utility.benchmark import Bench
-from utility import temp_data
-
-sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/")
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+    sys.path.insert(1,cwd)
 
+# pylmm imports
 from kinship import kinship, kinship_full, kvakve
 import genotype
 import phenotype
 import gwas
+from benchmark import Bench
 
+# The following imports are for exchanging data with the webserver
+import simplejson as json
+from redis import Redis
+Redis = Redis()
+import temp_data
+
+has_gn2=None
+
+# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
 try:
-    from wqflask.my_pylmm.pyLMM import chunks
+    sys.stderr.write("INFO: lmm try loading module\n")
+    import utility.formatting # this is never used, just to check the environment
+    sys.stderr.write("INFO: This is a genenetwork2 environment\n")
+    from gn2 import uses, progress_set_func
+    has_gn2=True
 except ImportError:
-    print("WARNING: Standalone version missing the Genenetwork2 environment\n")
+    # Failed to load gn2
     has_gn2=False
-    pass
+    import standalone as handlers
+    from standalone import uses, progress_set_func
+    sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
 
 #np.seterr('raise')
 
@@ -76,8 +86,7 @@ def run_human(pheno_vector,
             covariate_matrix,
             plink_input_file,
             kinship_matrix,
-            refit=False,
-            tempdata=None):
+            refit=False):
 
     v = np.isnan(pheno_vector)
     keep = True - v
@@ -169,10 +178,7 @@ def run_human(pheno_vector,
             #if count > 1000:
             #    break
             count += 1
-
-            percent_complete = (float(count) / total_snps) * 100
-            #print("percent_complete: ", percent_complete)
-            tempdata.store("percent_complete", percent_complete)
+            progress("human",count,total_snps)
 
             #with Bench("actual association"):
             ps, ts = human_association(snp,
@@ -260,23 +266,19 @@ def human_association(snp,
 def run_other_old(pheno_vector,
         genotype_matrix,
         restricted_max_likelihood=True,
-        refit=False,
-        tempdata=None      # <---- can not be None
-        ):
+        refit=False):
     
     """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
     
     restricted_max_likelihood -- whether to use restricted max likelihood; True or False
     refit -- whether to refit the variance component for each marker
-    temp_data -- TempData object that stores the progress for each major step of the
-    calculations ("calculate_kinship" and "GWAS" take the majority of time) 
     
     """
     
     print("Running the original LMM engine in run_other (old)")
     print("REML=",restricted_max_likelihood," REFIT=",refit)
     with Bench("Calculate Kinship"):
-        kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+        kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix)
     
     print("kinship_matrix: ", pf(kinship_matrix))
     print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -292,27 +294,22 @@ def run_other_old(pheno_vector,
 
     with Bench("Doing GWAS"):
         t_stats, p_values = GWAS(pheno_vector,
-                                      genotype_matrix,
+                                      genotype_matrix.T,
                                       kinship_matrix,
                                       restricted_max_likelihood=True,
-                                      refit=False,
-                                      temp_data=tempdata)
+                                      refit=False)
     Bench().report()
     return p_values, t_stats
 
-def run_other_new(pheno_vector,
-        genotype_matrix,
+def run_other_new(n,m,pheno_vector,
+        geno,
         restricted_max_likelihood=True,
-        refit=False,
-        tempdata=None      # <---- can not be None
-        ):
+        refit=False):
     
     """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
     
     restricted_max_likelihood -- whether to use restricted max likelihood; True or False
     refit -- whether to refit the variance component for each marker
-    temp_data -- TempData object that stores the progress for each major step of the
-    calculations ("calculate_kinship" and "GWAS" take the majority of time) 
     
     """
     
@@ -320,8 +317,7 @@ def run_other_new(pheno_vector,
     print("REML=",restricted_max_likelihood," REFIT=",refit)
 
     # Adjust phenotypes
-    Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
-    print("Removed missing phenotypes",Y.shape)
+    n,Y,keep = phenotype.remove_missing_new(n,pheno_vector)
 
     # if options.maf_normalization:
     #     G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
@@ -329,8 +325,9 @@ def run_other_new(pheno_vector,
     # if not options.skip_genotype_normalization:
     # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
 
+    geno = geno[:,keep]
     with Bench("Calculate Kinship"):
-        K,G = calculate_kinship(G, tempdata)
+        K,G = calculate_kinship_new(geno)
     
     print("kinship_matrix: ", pf(K))
     print("kinship_matrix.shape: ", pf(K.shape))
@@ -345,7 +342,7 @@ def run_other_new(pheno_vector,
 
     with Bench("Doing GWAS"):
         t_stats, p_values = gwas.gwas(Y,
-                                      G.T,
+                                      G,
                                       K,
                                       restricted_max_likelihood=True,
                                       refit=False,verbose=True)
@@ -385,18 +382,30 @@ def matrixMult(A,B):
 
     return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
 
-
-def calculate_kinship_new(genotype_matrix, temp_data=None):
+def calculate_kinship_new(genotype_matrix):
+    """ 
+    Call the new kinship calculation where genotype_matrix contains
+    inds (columns) by snps (rows).
+    """
+    assert type(genotype_matrix) is np.ndarray
+    info("call genotype.normalize")
+    G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix)
+    mprint("G",genotype_matrix)
+    info("call calculate_kinship_new")
+    return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME)
+
+def calculate_kinship_iter(geno):
     """ 
     Call the new kinship calculation where genotype_matrix contains
     inds (columns) by snps (rows).
     """
-    print("call genotype.normalize")
+    assert type(genotype_matrix) is iter
+    info("call genotype.normalize")
     G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
-    print("call calculate_kinship_new")
-    return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
+    info("call calculate_kinship_new")
+    return kinship(G)
 
-def calculate_kinship_old(genotype_matrix, temp_data=None):
+def calculate_kinship_old(genotype_matrix):
     """
     genotype_matrix is an n x m matrix encoding SNP minor alleles.
     
@@ -404,13 +413,15 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
     normalizes the resulting vectors and returns the RRM matrix.
     
     """
-    print("call calculate_kinship_old")
+    info("call calculate_kinship_old")
+    fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog")
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    print("genotype 2D matrix n (inds) is:", n)
-    print("genotype 2D matrix m (snps) is:", m)
+    info("genotype 2D matrix n (inds) is: %d" % (n))
+    info("genotype 2D matrix m (snps) is: %d" % (m))
     assert m>n, "n should be larger than m (snps>inds)"
     keep = []
+    mprint("G (before old normalize)",genotype_matrix)
     for counter in range(m):
         #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
         #Checks if any values in column are not numbers
@@ -429,17 +440,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
             continue
         keep.append(counter)
         genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
-        
-        percent_complete = int(round((counter/m)*45))
-        if temp_data != None:
-            temp_data.store("percent_complete", percent_complete)
+        progress('kinship_old normalize genotype',counter,m)
         
     genotype_matrix = genotype_matrix[:,keep]
-    print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
+    mprint("G (after old normalize)",genotype_matrix.T)
     kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
     return kinship_matrix,genotype_matrix
-
-calculate_kinship = calculate_kinship_new  # alias
+    # return kinship_full(genotype_matrix.T),genotype_matrix
 
 def GWAS(pheno_vector,
          genotype_matrix,
@@ -465,9 +472,9 @@ def GWAS(pheno_vector,
     refit - refit the variance component for each SNP
     
     """
-    if kinship_eigen_vals == None:
+    if kinship_eigen_vals is None:
         kinship_eigen_vals = []
-    if kinship_eigen_vectors == None:
+    if kinship_eigen_vectors is None:
         kinship_eigen_vectors = []
     
     n = genotype_matrix.shape[0]
@@ -537,9 +544,8 @@ def GWAS(pheno_vector,
                 lmm_ob.fit(X=x)
             ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
             
-        percent_complete = 45 + int(round((counter/m)*55))
-        temp_data.store("percent_complete", percent_complete)
-
+        progress("gwas_old",counter,m)
+        
         p_values.append(ps)
         t_statistics.append(ts)
 
@@ -572,7 +578,7 @@ class LMM:
        When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
        """
 
-       if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+       if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
        self.verbose = verbose
  
        #x = Y != -9
@@ -665,7 +671,7 @@ class LMM:
            REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
         """
  
-        if X == None:
+        if X is None:
             X = self.X0t
         elif stack: 
             self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -804,48 +810,96 @@ class LMM:
        pl.ylabel("Probability of data")
        pl.title(title)
 
-
-def gn2_redis(key,species,new_code=True):
+def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
     """
-    Invoke pylmm using Redis as a container. new_code runs the new
-    version
+    Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
     """
-    json_params = Redis.get(key)
-    
-    params = json.loads(json_params)
-    
-    tempdata = temp_data.TempData(params['temp_uuid'])
-
-    
-    print('pheno', np.array(params['pheno_vector']))
+    info("run_gwas")
+    print('pheno', y)
     
     if species == "human" :
-        print('kinship', np.array(params['kinship_matrix']))
-        ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
-                  covariate_matrix = np.array(params['covariate_matrix']),
-                  plink_input_file = params['input_file_name'],
-                  kinship_matrix = np.array(params['kinship_matrix']),
-                  refit = params['refit'],
-                  tempdata = tempdata)
+        print('kinship', k )
+        ps, ts = run_human(pheno_vector = y,
+                           covariate_matrix = cov,
+                           plink_input_file = inputfn,
+                           kinship_matrix = k,
+                           refit = refit)
     else:
-        geno = np.array(params['genotype_matrix'])
         print('geno', geno.shape, geno)
 
         if new_code:
-            ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']),
-                               genotype_matrix = geno,
-                               restricted_max_likelihood = params['restricted_max_likelihood'],
-                               refit = params['refit'],
-                               tempdata = tempdata)
+            ps, ts = run_other_new(n,m,pheno_vector = y,
+                                   geno = geno,
+                                   restricted_max_likelihood = reml,
+                                   refit = refit)
         else:
-            ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']),
+            ps, ts = run_other_old(pheno_vector = y,
                                genotype_matrix = geno,
-                               restricted_max_likelihood = params['restricted_max_likelihood'],
-                               refit = params['refit'],
-                               tempdata = tempdata)
+                               restricted_max_likelihood = reml,
+                               refit = refit)
+    return ps,ts
+
+def gwas_with_redis(key,species,new_code=True):
+    """
+    Invoke pylmm using Redis as a container. new_code runs the new
+    version. All the Redis code goes here!
+    """
+    json_params = Redis.get(key)
+    
+    params = json.loads(json_params)
+    
+    tempdata = temp_data.TempData(params['temp_uuid'])
+    def update_tempdata(loc,i,total):
+        """
+        This is the single method that updates Redis for percentage complete!
+        """
+        tempdata.store("percent_complete",round(i*100.0/total))
+        debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
+    progress_set_func(update_tempdata)
+
+    def narray(t):
+        info("Type is "+t)
+        v = params.get(t)
+        if v is not None:
+            # Note input values can be array of string or float
+            v1 = [x if x != 'NA' else 'nan' for x in v]
+            v = np.array(v1).astype(np.float)
+        return v
+
+    def marray(t):
+        info("Type is "+t)
+        v = params.get(t)
+        if v is not None:
+            m = []
+            for r in v:
+              # Note input values can be array of string or float
+              r1 = [x if x != 'NA' else 'nan' for x in r]
+              m.append(np.array(r1).astype(np.float))
+            return np.array(m)
+        return np.array(v)
+
+    def marrayT(t):
+        m = marray(t)
+        if m is not None:
+            return m.T
+        return m
+    
+    # We are transposing before we enter run_gwas - this should happen on the webserver
+    # side (or when reading data from file)
+    k = marray('kinship_matrix')
+    g = marrayT('genotype_matrix')
+    mprint("geno",g)
+    y = narray('pheno_vector')
+    n = len(y)
+    m = params.get('num_genotypes')
+    if m is None:
+      m = g.shape[0]
+    info("m=%d,n=%d" % (m,n))
+    ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code)
         
     results_key = "pylmm:results:" + params['temp_uuid']
 
+    # fatal(results_key)
     json_results = json.dumps(dict(p_values = ps,
                                    t_stats = ts))
     
@@ -854,28 +908,56 @@ def gn2_redis(key,species,new_code=True):
     Redis.expire(results_key, 60*60)
     return ps, ts
 
-# This is the main function used by Genenetwork2 (with environment)
-def gn2_main():
-    parser = argparse.ArgumentParser(description='Run pyLMM')
-    parser.add_argument('-k', '--key')
-    parser.add_argument('-s', '--species')
-    
-    opts = parser.parse_args()
-    
-    key = opts.key
-    species = opts.species
+def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+    """
+    This function emulates current GN2 behaviour by pre-loading Redis (note the input
+    genotype is transposed to emulate GN2 (FIXME!)
+    """
+    info("Loading Redis from parsed data")
+    if kinship == None:
+        k = None
+    else:
+        k = kinship.tolist()
+    params = dict(pheno_vector = pheno.tolist(),
+                  genotype_matrix = geno.T.tolist(),
+                  num_genotypes = geno.shape[0],
+                  kinship_matrix = k,
+                  covariate_matrix = None,
+                  input_file_name = None,
+                  restricted_max_likelihood = True,
+                  refit = False,
+                  temp_uuid = "testrun_temp_uuid",
+                        
+                  # meta data
+                  timestamp = datetime.datetime.now().isoformat())
+            
+    json_params = json.dumps(params)
+    Redis.set(key, json_params)
+    Redis.expire(key, 60*60)
 
-    gn2_redis(key,species)
+    return gwas_with_redis(key,species,new_code)
 
-def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
-    print("Loading Redis from parsed data")
+def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
+    """
+    This function emulates GN2 behaviour by pre-loading Redis with
+    a SNP iterator, for this it sets a key for every genotype (SNP)
+    """
+    print("Loading Redis using a SNP iterator")
+    for i,genotypes in enumerate(geno_iterator):
+        gkey = key+'_geno_'+str(i)
+        Redis.set(gkey, genotypes)
+        Redis.expire(gkey, 60*60)
+    
     if kinship == None:
         k = None
     else:
         k = kinship.tolist()
     params = dict(pheno_vector = pheno.tolist(),
-                  genotype_matrix = geno.tolist(),
-                  kinship_matrix= k,
+                  genotype_matrix = "iterator",
+                  num_genotypes = i,
+                  kinship_matrix = k,
+                  covariate_matrix = None,
+                  input_file_name = None,
                   restricted_max_likelihood = True,
                   refit = False,
                   temp_uuid = "testrun_temp_uuid",
@@ -887,14 +969,27 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
     json_params = json.dumps(params)
     Redis.set(key, json_params)
     Redis.expire(key, 60*60)
+    return gwas_with_redis(key,species)
 
-    return gn2_redis(key,species,new_code)
+# This is the main function used by Genenetwork2 (with environment)
+#
+# Note that this calling route will become OBSOLETE (we should use runlmm.py
+# instead)
+def gn2_main():
+    import argparse
+    parser = argparse.ArgumentParser(description='Run pyLMM')
+    parser.add_argument('-k', '--key')
+    parser.add_argument('-s', '--species')
     
+    opts = parser.parse_args()
+    
+    key = opts.key
+    species = opts.species
+
+    gwas_with_redis(key,species)
+
+
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
-    if has_gn2:
-        gn2_main()
-    else:
-        print("Run from runlmm.py instead")
-
+    gn2_main()
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d4b3ac82..d871d8d2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,6 +24,25 @@ from scipy import optimize
 from optmatrix import matrixMult
 import kinship
 
+sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
+try:
+    sys.stderr.write("INFO: lmm2 try loading module\n")
+    import utility.formatting # this is never used, just to check the environment
+    sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
+    from gn2 import uses, progress_set_func
+except ImportError:
+    # Failed to load gn2
+    has_gn2=False
+    import standalone as handlers
+    from standalone import uses, progress_set_func
+    sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
 def calculateKinship(W,center=False):
       """
 	 W is an n x m matrix encoding SNP minor alleles.
@@ -75,7 +94,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
       print("genotype matrix n is:", n)
       print("genotype matrix m is:", m)
 
-      if X0 == None: 
+      if X0 is None: 
          X0 = np.ones((n,1))
       
       # Remove missing values in Y and adjust associated parameters
@@ -139,31 +158,35 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
 
 class LMM2:
 
-   """
-	 This is a simple version of EMMA/fastLMM.  
-	 The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
-	 and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
-	 There are three model parameters: heritability (h), covariate coefficients (beta) and the total
-	 phenotypic variance (sigma).
-	 Heritability as defined here is the proportion of the total variance (sigma) that is attributed to 
-	 the kinship matrix.
-
-	 For simplicity, we assume that everything being input is a numpy array.
-	 If this is not the case, the module may throw an error as conversion from list to numpy array
-	 is not done consistently.
+   """This is a simple version of EMMA/fastLMM.
+
+   The main purpose of this module is to take a phenotype vector (Y),
+   a set of covariates (X) and a kinship matrix (K) and to optimize
+   this model by finding the maximum-likelihood estimates for the
+   model parameters.  There are three model parameters: heritability
+   (h), covariate coefficients (beta) and the total phenotypic
+   variance (sigma).  Heritability as defined here is the proportion
+   of the total variance (sigma) that is attributed to the kinship
+   matrix.
+
+   For simplicity, we assume that everything being input is a numpy
+   array.  If this is not the case, the module may throw an error as
+   conversion from list to numpy array is not done consistently.
 
    """
    def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
 
-      """
-      The constructor takes a phenotype vector or array Y of size n.
-      It takes a kinship matrix K of size n x n.  Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
-      If they are not provided, the constructor will calculate them.
-      X0 is an optional covariate matrix of size n x q, where there are q covariates.
-      When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+      """The constructor takes a phenotype vector or array Y of size n. It
+      takes a kinship matrix K of size n x n.  Kva and Kve can be
+      computed as Kva,Kve = linalg.eigh(K) and cached.  If they are
+      not provided, the constructor will calculate them.  X0 is an
+      optional covariate matrix of size n x q, where there are q
+      covariates.  When this parameter is not provided, the
+      constructor will set X0 to an n x 1 matrix of all ones to
+      represent a mean effect.
       """
 
-      if X0 == None: 
+      if X0 is None: 
 	 X0 = np.ones(len(Y)).reshape(len(Y),1)
       self.verbose = verbose
 
@@ -250,7 +273,7 @@ class LMM2:
 	 REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
       """
 
-      if X == None: X = self.X0t
+      if X is None: X = self.X0t
       elif stack: 
 	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
 	 X = self.X0t_stack
@@ -306,7 +329,7 @@ class LMM2:
 	 Given this optimum, the function computes the LL and associated ML solutions.
       """
       
-      if X == None: X = self.X0t
+      if X is None: X = self.X0t
       else: 
 	 #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
 	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -330,7 +353,7 @@ class LMM2:
    def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
       """
 	Calculates association statitics for the SNPs encoded in the vector X of size n.
-	If h == None, the optimal h stored in optH is used.
+	If h is None, the optimal h stored in optH is used.
 
       """
       if False:
@@ -348,7 +371,7 @@ class LMM2:
          self.X0t_stack[:,(self.q)] = m
 	 X = self.X0t_stack
 	 
-      if h == None: h = self.optH
+      if h is None: h = self.optH
 
       L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
       q  = len(beta)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 682ba371..7b652515 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -19,22 +19,47 @@
 import sys
 import numpy as np
 
-def remove_missing(y,g,verbose=False):
+# ---- A trick to decide on the environment:
+try:
+    from wqflask.my_pylmm.pyLMM import chunks
+    from gn2 import uses, progress_set_func
+except ImportError:
+    has_gn2=False
+    import standalone as handlers
+    from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
+def remove_missing(n,y,g):
     """
     Remove missing data from matrices, make sure the genotype data has
     individuals as rows
     """
-    assert(y!=None)
-    assert(y.shape[0] == g.shape[0])
+    assert(y is not None)
+    assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape)
 
     y1 = y
     g1 = g
     v = np.isnan(y)
     keep = True - v
     if v.sum():
-        if verbose: 
-            sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+        info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
         y1 = y[keep]
         g1 = g[keep,:]
-    return y1,g1,keep
+        n = y1.shape[0]
+    return n,y1,g1,keep
+
+def remove_missing_new(n,y):
+    """
+    Remove missing data. Returns new n,y,keep
+    """
+    assert(y is not None)
+    y1 = y
+    v = np.isnan(y)
+    keep = True - v
+    if v.sum():
+        info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum()))
+        y1 = y[keep]
+        n = y1.shape[0]
+    return n,y1,keep
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 324c4f2c..6b241cd6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,10 +21,21 @@ from optparse import OptionParser
 import sys
 import tsvreader
 import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_old
+
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+    sys.path.insert(1,cwd)
+
+# pylmm modules
+from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
 from kinship import kinship, kinship_full
 import genotype
 import phenotype
+from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
 
 usage = """
 python runlmm.py [options] command
@@ -98,44 +109,64 @@ if options.pheno:
     y = tsvreader.pheno(options.pheno)
     print y.shape
 
-if options.geno:
+if options.geno and cmd != 'iterator':
     g = tsvreader.geno(options.geno)
     print g.shape
 
-if cmd == 'redis_new':
-    # The main difference between redis_new and redis is that missing
-    # phenotypes are handled by the first
-    Y = y
-    G = g
-    print "Original G",G.shape, "\n", G
-
-    gt = G.T
-    G = None
-    ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+def check_results(ps,ts):
     print np.array(ps)
     print len(ps),sum(ps)
-    # Test results
     p1 = round(ps[0],4)
     p2 = round(ps[-1],4)
-    sys.stderr.write(options.geno+"\n")
     if options.geno == 'data/small.geno':
-        assert p1==0.0708, "p1=%f" % p1
-        assert p2==0.1417, "p2=%f" % p2
+        info("Validating results for "+options.geno)
+        assert p1==0.7387, "p1=%f" % p1
+        assert p2==0.7387, "p2=%f" % p2
     if options.geno == 'data/small_na.geno':
-        assert p1==0.0897, "p1=%f" % p1
-        assert p2==0.0405, "p2=%f" % p2
+        info("Validating results for "+options.geno)
+        assert p1==0.062, "p1=%f" % p1
+        assert p2==0.062, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
-        # assert p1==0.8984, "p1=%f" % p1
-        # assert p2==0.9621, "p2=%f" % p2
+        info("Validating results for "+options.geno)
         assert round(sum(ps)) == 4070
         assert len(ps) == 8000
+    info("Run completed")
+
+if y is not None:
+    n = y.shape[0]
+
+if cmd == 'run':
+    if options.remove_missing_phenotypes:
+        raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+    n = len(y)
+    m = g.shape[1]
+    ps, ts = run_gwas('other',n,m,k,y,g)  # <--- pass in geno by SNP
+    check_results(ps,ts)
+elif cmd == 'iterator':
+    if options.remove_missing_phenotypes:
+        raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+    geno_iterator =  tsvreader.geno_iter(options.geno)
+    ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator)
+    check_results(ps,ts)
+elif cmd == 'redis_new':
+    # The main difference between redis_new and redis is that missing
+    # phenotypes are handled by the first
+    if options.remove_missing_phenotypes:
+        raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+    Y = y
+    G = g
+    print "Original G",G.shape, "\n", G
+    # gt = G.T
+    # G = None
+    ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True)
+    check_results(ps,ts)
 elif cmd == 'redis':
     # Emulating the redis setup of GN2
     G = g
     print "Original G",G.shape, "\n", G
-    if y != None and options.remove_missing_phenotypes:
+    if y is not None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
-        Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
+        n,Y,g,keep = phenotype.remove_missing(n,y,gnt)
         G = g.T
         print "Removed missing phenotypes",G.shape, "\n", G
     else:
@@ -148,30 +179,16 @@ elif cmd == 'redis':
     g = None
     gnt = None
 
-    gt = G.T
-    G = None
-    ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
-    print np.array(ps)
-    print len(ps),sum(ps)
-    # Test results 4070.02346579
-    p1 = round(ps[0],4)
-    p2 = round(ps[-1],4)
-    sys.stderr.write(options.geno+"\n")
-    if options.geno == 'data/small.geno':
-        assert p1==0.0708, "p1=%f" % p1
-        assert p2==0.1417, "p2=%f" % p2
-    if options.geno == 'data/small_na.geno':
-        assert p1==0.0897, "p1=%f" % p1
-        assert p2==0.0405, "p2=%f" % p2
-    if options.geno == 'data/test8000.geno':
-        assert round(sum(ps)) == 4070
-        assert len(ps) == 8000
+    # gt = G.T
+    # G = None
+    ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
+    check_results(ps,ts)
 elif cmd == 'kinship':
     G = g
     print "Original G",G.shape, "\n", G
     if y != None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
-        Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+        n,Y,g,keep = phenotype.remove_missing(n,y,g.T)
         G = g.T
         print "Removed missing phenotypes",G.shape, "\n", G
     if options.maf_normalization:
@@ -187,20 +204,20 @@ elif cmd == 'kinship':
         print "Genotype",G.shape, "\n", G
         print "first Kinship method",K.shape,"\n",K
         k1 = round(K[0][0],4)
-        K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
+        K2,G = calculate_kinship_new(np.copy(G))
         print "Genotype",G.shape, "\n", G
         print "GN2 Kinship method",K2.shape,"\n",K2
         k2 = round(K2[0][0],4)
     
     print "Genotype",G.shape, "\n", G
-    K3 = kinship(G.T)
+    K3 = kinship(G)
     print "third Kinship method",K3.shape,"\n",K3
     sys.stderr.write(options.geno+"\n")
     k3 = round(K3[0][0],4)
     if options.geno == 'data/small.geno':
-        assert k1==0.8, "k1=%f" % k1
-        assert k2==0.7939, "k2=%f" % k2
-        assert k3==0.7939, "k3=%f" % k3
+        assert k1==0.8333, "k1=%f" % k1
+        assert k2==0.9375, "k2=%f" % k2
+        assert k3==0.9375, "k3=%f" % k3
     if options.geno == 'data/small_na.geno':
         assert k1==0.8333, "k1=%f" % k1
         assert k2==0.7172, "k2=%f" % k2
@@ -209,4 +226,4 @@ elif cmd == 'kinship':
         assert k3==1.4352, "k3=%f" % k3
 
 else:
-    print "Doing nothing"
+    fatal("Doing nothing")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
new file mode 100644
index 00000000..40b2021d
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+#   logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None 
+progress_current  = None
+progress_prev_perc     = None
+
+def progress_default_func(location,count,total):
+    global progress_current
+    value = round(count*100.0/total)
+    progress_current = value
+    
+progress_func = progress_default_func
+
+def progress_set_func(func):
+    global progress_func
+    progress_func = func
+    
+def progress(location, count, total):
+    global progress_location
+    global progress_prev_perc
+    
+    perc = round(count*100.0/total)
+    if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+        progress_func(location, count, total)
+        logger.info("Progress: %s %d%%" % (location,perc))
+        progress_location = location
+        progress_prev_perc = perc
+
+def mprint(msg,data):
+    """
+    Array/matrix print function
+    """
+    m = np.array(data)
+    if m.ndim == 1:
+        print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+    if m.ndim == 2:
+        print(msg,m.shape,"=\n[",
+              m[0][0:3]," ... ",m[0][-3:],"\n ",
+              m[1][0:3]," ... ",m[1][-3:],"\n  ...\n ",
+              m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+              m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+    logger.critical(msg)
+    raise Exception(msg)
+
+def callbacks():
+    return dict(
+        write = sys.stdout.write,
+        writeln = print,
+        debug = logger.debug,
+        info = logger.info,
+        warning = logger.warning,
+        error = logger.error,
+        critical = logger.critical,
+        fatal = fatal,
+        progress = progress,
+        mprint = mprint
+    )
+
+def uses(*funcs):
+    """
+    Some sugar
+    """
+    return [callbacks()[func] for func in funcs]
+    
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+    # logging.basicConfig(level=logging.DEBUG)
+    logging.debug("Test %i" % (1))
+    d = callbacks()['debug']
+    d("TEST")
+    wrln = callbacks()['writeln']
+    wrln("Hello %i" % 34)
+    progress = callbacks()['progress']
+    progress("I am half way",50,100)
+    list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+            0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+    mprint("list",list)
+    matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+            [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+    mprint("matrix",matrix)
+    ix,dx = uses("info","debug")
+    ix("ix")
+    dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
new file mode 100644
index 00000000..004d45c6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
@@ -0,0 +1,25 @@
+from __future__ import print_function, division, absolute_import
+from redis import Redis
+
+import simplejson as json
+
+class TempData(object):
+    
+    def __init__(self, temp_uuid):
+        self.temp_uuid = temp_uuid
+        self.redis = Redis()
+        self.key = "tempdata:{}".format(self.temp_uuid)
+        
+    def store(self, field, value):
+        self.redis.hset(self.key, field, value)
+        self.redis.expire(self.key, 60*15)  # Expire in 15 minutes
+        
+    def get_all(self):
+        return self.redis.hgetall(self.key)
+
+
+if __name__ == "__main__":
+    redis = Redis()
+    for key in redis.keys():
+        for field in redis.hkeys(key):
+            print("{}.{}={}".format(key, field, redis.hget(key, field)))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..66b34ee2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -56,7 +56,8 @@ def geno(fn):
 
     print fn
     with open(fn,'r') as tsvin:
-        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        line = tsvin.readline().strip()
+        assert line == "# Genotype format version 1.0", line
         tsvin.readline()
         tsvin.readline()
         tsvin.readline()
@@ -74,3 +75,48 @@ def geno(fn):
     G = np.array(G1)
     return G
 
+def geno(fn):
+    G1 = []
+    for id,values in geno_iter(fn):
+        G1.append(values) # <--- slow
+    G = np.array(G1)
+    return G
+
+def geno_callback(fn,func):
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+    print fn
+    with open(fn,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            id = row[0]
+            gs = list(row[1])
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            func(id,gs2) 
+
+def geno_iter(fn):
+    """
+    Yield a tuple of snpid and values
+    """
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+    print fn
+    with open(fn,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            id = row[0]
+            gs = list(row[1])
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            yield (id,gs2) 
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index a87d3537..881ea74d 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -79,8 +79,8 @@ open_mapping_results = (data) ->
     $.colorbox(
         html: data
         href: "#mapping_results_holder"
-        height: "80%"
-        width: "80%"
+        height: "90%"
+        width: "90%"
     )
 
 showalert = (message,alerttype) ->
@@ -154,8 +154,8 @@ $("#plink_compute").click(() =>
         $("#static_progress_bar_container").modal()
         url = "/marker_regression"
         $('input[name=method]').val("plink")
-        $('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
-        $('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
+        #$('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
+        #$('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
         $('input[name=maf]').val($('input[name=maf_plink]').val())
         form_data = $('#trait_data_form').serialize()
         console.log("form_data is:", form_data)
@@ -164,11 +164,12 @@ $("#plink_compute").click(() =>
 )
 
 $("#gemma_compute").click(() =>
+        console.log("RUNNING GEMMA")
         $("#static_progress_bar_container").modal()
         url = "/marker_regression"
         $('input[name=method]').val("gemma")
-        $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
-        $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
+        #$('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
+        #$('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
         $('input[name=maf]').val($('input[name=maf_gemma]').val())
         form_data = $('#trait_data_form').serialize()
         console.log("form_data is:", form_data)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
index c8988cdc..1779df4b 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
@@ -99,8 +99,8 @@ open_mapping_results = function(data) {
   return $.colorbox({
     html: data,
     href: "#mapping_results_holder",
-    height: "80%",
-    width: "80%"
+    height: "90%",
+    width: "90%"
   });
 };
 
@@ -187,11 +187,10 @@ $("#plink_compute").click((function(_this) {
 $("#gemma_compute").click((function(_this) {
   return function() {
     var form_data, url;
+    console.log("RUNNING GEMMA");
     $("#static_progress_bar_container").modal();
     url = "/marker_regression";
     $('input[name=method]').val("gemma");
-    $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val());
-    $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val());
     $('input[name=maf]').val($('input[name=maf_gemma]').val());
     form_data = $('#trait_data_form').serialize();
     console.log("form_data is:", form_data);
diff --git a/wqflask/wqflask/templates/pair_scan_results.html b/wqflask/wqflask/templates/pair_scan_results.html
index 869dabed..f46d7cbf 100644
--- a/wqflask/wqflask/templates/pair_scan_results.html
+++ b/wqflask/wqflask/templates/pair_scan_results.html
@@ -33,6 +33,28 @@
             <h2>
                 Results
             </h2>
+            <table cellpadding="0" cellspacing="0" border="0" id="pair_scan_results" class="table table-hover table-striped table-bordered">
+                <thead>
+                    <tr>
+                        <td>Index</td>
+                        <td>Locus</td>
+                        <td>Chr 1</td>
+                        <td>Mb</td>
+                        <td>Chr 2</td>
+                   </tr>
+                </thead>
+                <tbody>
+                    {% for marker in pair_scan_results %}
+                        <tr>
+                            <td>{{loop.index}}</td>
+                            <td>{{marker.name}}</td>
+                            <td>{{marker.chr1}}</td>
+                            <td>{{marker.Mb}}</td>
+                            <td>{{marker.chr2}}</td>
+                        </tr>
+                    {% endfor %}
+                </tbody>
+            </table>
         </div>
     </div>
 
diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html
index 731f6fbd..c7c2a62f 100755
--- a/wqflask/wqflask/templates/search_result_page.html
+++ b/wqflask/wqflask/templates/search_result_page.html
@@ -42,7 +42,7 @@
             <button class="btn btn-default" id="deselect_all"><span class="glyphicon glyphicon-remove"></span> Deselect All</button>
             <button class="btn btn-default" id="invert"><span class="glyphicon glyphicon-resize-vertical"></span> Invert</button>
             <button class="btn btn-default" id="add"><span class="glyphicon glyphicon-plus-sign"></span> Add</button>
-            <button class="btn btn-primary pull-right"><span class="glyphicon glyphicon-download"></span> Download Table</button>
+            <button class="btn btn-primary"><span class="glyphicon glyphicon-download"></span> Download Table</button>
             <br />
             <br />
             <table class="table table-hover table-striped" id='trait_table'>
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index 426a5c5d..27504e51 100755
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -217,16 +217,16 @@
                     <div style="padding: 20px" class="form-horizontal">
                         <div class="mapping_method_fields form-group">
                             <label for="maf_plink" class="col-xs-2 control-label">Minor allele threshold</label>
-                            <div style="margin-left: 20px;" class="col-xs-1 controls">
+                            <div style="margin-left: 20px;" class="col-xs-2 controls">
                                 <input name="maf_plink" value="0.01" type="text" class="form-control">
                             </div>
                         </div>
                     </div>
                     
                     <div class="form-group">
-                        <label for="gemma_compute" class="col-xs-1 control-label"></label>
+                        <label for="plink_compute" class="col-xs-1 control-label"></label>
                         <div style="margin-left:20px;" class="col-xs-4 controls">
-                            <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+                            <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
                                 Compute
                             </button>
                         </div>
@@ -237,16 +237,16 @@
                     <div style="padding: 20px" class="form-horizontal">
                         <div class="mapping_method_fields form-group">
                             <label for="maf_gemma" class="col-xs-2 control-label">Minor allele threshold</label>
-                            <div style="margin-left: 20px;" class="col-xs-1 controls">
+                            <div style="margin-left: 20px;" class="col-xs-2 controls">
                                 <input name="maf_gemma" value="0.01" type="text" class="form-control">
                             </div>
                         </div>
                     </div>
                     
                     <div class="form-group">
-                        <label for="plink_compute" class="col-xs-1 control-label"></label>
+                        <label for="gemma_compute" class="col-xs-1 control-label"></label>
                         <div style="margin-left:20px;" class="col-xs-4 controls">
-                            <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+                            <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
                                 Compute
                             </button>
                         </div>