wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

# This is a converter for common LMM formats, so as to keep complexity
# outside the main routines. 

# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from optparse import OptionParser
import sys
import os
import numpy as np
# from lmm import LMM, run_other
import input

usage = """
python convertlmm.py [--kinship kfile] 

  Convert files for runlmm.py processing. Writes to stdout.

  try --help for more information
"""

# if len(args) == 0:
#     print usage
#     sys.exit(1)

option_parser = OptionParser(usage=usage)
option_parser.add_option("--kinship", dest="kinship",
                  help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program")
option_parser.add_option("--plink", dest="plink",
                  help="Parse a phenotype file (PLINK style)")
# option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
#                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
option_parser.add_option("--prefix", dest="prefix",
                  help="Output prefix for output file(s)")
option_parser.add_option("-q", "--quiet",
                  action="store_false", dest="verbose", default=True,
                  help="don't print status messages to stdout")
option_parser.add_option("-v", "--verbose",
                  action="store_true", dest="verbose", default=False,
                  help="Print extra info")

(options, args) = option_parser.parse_args()

writer = None

def msg(s):
    sys.stderr.write("INFO: ")
    sys.stderr.write(s)
    sys.stderr.write("\n")
    
def wr(s):
    if writer:
        writer.write(s)
    else:
        sys.stdout.write(s)

def wrln(s):
    wr(s)
    wr("\n")
            
if options.kinship:
    is_header = True
    count = 0
    msg("Converting "+options.kinship)
    if options.prefix:
        writer = open(options.prefix+".kin","w")
    for line in open(options.kinship,'r'):
        count += 1
        if is_header:
            size = len(line.split())
            wrln("# Kinship format version 1.0")
            wrln("# Size="+str(size))
            for i in range(size):
                wr("\t"+str(i+1))
            wr("\n")
            is_header = False
        wr(str(count))
        wr("\t")
        wr("\t".join(line.split()))
        wr("\n")
    msg(str(count)+" lines written")

if options.plink:
    # Because plink does not track size we need to read the whole thing first
    msg("Converting "+options.plink)
    phenos = []
    count = 0
    count_pheno = None
    for line in open(options.plink,'r'):
        count += 1
        list = line.split()
        pcount = len(list)-2
        assert(pcount > 0)
        if count_pheno == None:
            count_pheno = pcount
        assert(count_pheno == pcount)
        row = [list[0]]+list[2:]
        phenos.append(row)

    if options.prefix:
        writer = open(options.prefix+".pheno","w")
    wrln("# Phenotype format version 1.0")
    wrln("# Individuals = "+str(count))
    wrln("# Phenotypes = "+str(count_pheno))
    for i in range(count_pheno):
        wr("\t"+str(i+1))
        wr("\n")
    for i in range(count):
        wr("\t".join(phenos[i]))
        wr("\n")
    msg(str(count)+" lines written")
            
msg("Converting done")