about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2015-03-10 11:17:31 +0300
committerPjotr Prins2015-03-10 11:17:31 +0300
commit5528b84927e8e27ad4c13621272bb4bee4a9d694 (patch)
tree1f3537f78bd8bcf7622995f97fbb34d5a1951de2
parentf7668a8b9fe552b9d908e2c68367e660d3a81482 (diff)
downloadgenenetwork2-5528b84927e8e27ad4c13621272bb4bee4a9d694.tar.gz
Turn HAB encoding into pylmm genotyping
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py3
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py15
2 files changed, 14 insertions, 4 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 8a1f03ad..89c09b1e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -147,7 +147,8 @@ if options.geno:
         writer = open(options.prefix+".geno","w")
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
-    wrln("# Phenotypes = "+str(len(num_snps)))
+    wrln("# SNPs = "+str(len(num_snps)))
+    wrln("# Encoding = HAB")
     for i in range(num_inds):
         wr("\t"+str(i+1))
     wr("\n")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 4398926f..ce8e32be 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -28,7 +28,8 @@ import csv
 usage = """
 python runlmm.py [options] command
 
-  runlmm.py processing multiplexer reads standard input types and calls the routines
+  runlmm.py processing multiplexer reads standardised input formats
+  and calls the different routines
 
   Current commands are:
 
@@ -88,6 +89,9 @@ if options.pheno:
 
 if options.geno:
     G1 = []
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
     print options.geno
     with open(options.geno,'r') as tsvin:
         assert(tsvin.readline().strip() == "# Genotype format version 1.0")
@@ -97,9 +101,14 @@ if options.geno:
         tsvin.readline()
         tsv = csv.reader(tsvin, delimiter='\t')
         for row in tsv:
-            print(row)
+            # print(row)
+            id = row[0]
+            gs = list(row[1])
+            # print id,gs
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            # print id,gs2
             # ns = np.genfromtxt(row[1:])
-            G1.append(ns) # <--- slow
+            G1.append(gs2) # <--- slow
     G = np.array(G1)
 
 print G