Write genotype file non-transposed

author: Pjotr Prins 2015-03-09 16:47:33 +0300
committer: Pjotr Prins 2015-03-09 16:47:33 +0300
commit: 749cf072da1849d926ab4d74e288ddf582e84c5a (patch)
tree: 2d79b136bb77309178508cce671bc7495217c105
parent: 02cb9710a83bc81abe198cb089dc5a70baf1b657 (diff)
download: genenetwork2-749cf072da1849d926ab4d74e288ddf582e84c5a.tar.gz
2 files changed, 16 insertions, 15 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index a3728bd2..f35cc47c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -148,17 +148,17 @@ if options.geno:
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
     wrln("# Phenotypes = "+str(len(num_snps)))
+    for i in range(len(num_snps)):
+        wr("\t"+str(i+1))
+        wr("\n")
 
+    m = []
     def out(i,x):
         wr(str(i)+"\t")
-        lst = [str(v) for v in x]
-        wr("\t".join(lst))
+        wr("\t".join(x))
         wr("\n")
         
-    snps = plink.readbed(options.geno+'.bed',num_inds, out)
-    # for i in range(num_snps):
-    #     wr("\t"+str(i+1))
-    #     wr("\n")
+    snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','NA'), out)
     # for i in range(count):
     #     wr("\t".join(phenos[i]))
     #     wr("\n")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
index 4ad2c5f7..7bd2df91 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -66,25 +66,26 @@ def readbim(fn):
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim
 
-def readbed(fn,inds,func=None):
+def readbed(fn,inds,encoding,func=None):
 
     # For every SNP block fetch the individual genotypes using values
     # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles
     def fetchGenotypes(X):
-        D = { \
-              '00': 0.0, \
-              '10': 0.5, \
-              '11': 1.0, \
-              '01': float('nan') \
-           }
-  
+        # D = { \
+        #      '00': 0.0, \
+        #      '10': 0.5, \
+        #      '11': 1.0, \
+        #      '01': float('nan') \
+        #   }
+
+        Didx = { '00': 0, '10': 1, '11': 2, '01': 3 }
         G = []
         for x in X:
             if not len(x) == 10:
                 xx = x[2:]
                 x = '0b' + '0'*(8 - len(xx)) + xx
             a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) 
-            L = [D[y] for y in [a,b,c,d]]
+            L = [encoding[Didx[y]] for y in [a,b,c,d]]
             G += L
         G = G[:inds]
         # G = np.array(G)
author	Pjotr Prins	2015-03-09 16:47:33 +0300
committer	Pjotr Prins	2015-03-09 16:47:33 +0300
commit	749cf072da1849d926ab4d74e288ddf582e84c5a (patch)
tree	2d79b136bb77309178508cce671bc7495217c105
parent	02cb9710a83bc81abe198cb089dc5a70baf1b657 (diff)
download	genenetwork2-749cf072da1849d926ab4d74e288ddf582e84c5a.tar.gz