diff options
author | DannyArends | 2016-03-23 23:07:16 +0100 |
---|---|---|
committer | Pjotr Prins | 2016-04-20 10:17:42 +0000 |
commit | 3d505d997511cd8f7b9f14510059cb2983edc6d4 (patch) | |
tree | 38682d4eed213a0a264d036623b59037e78a9cf6 /wqflask/utility/genofile_parser.py | |
parent | f5ce8d6abb8b6a48b5ef6af6444f0289327fd95b (diff) | |
download | genenetwork2-3d505d997511cd8f7b9f14510059cb2983edc6d4.tar.gz |
Parsing the names of the individuals, and coding H as -999
Diffstat (limited to 'wqflask/utility/genofile_parser.py')
-rw-r--r-- | wqflask/utility/genofile_parser.py | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py index 9dd7b08b..67b84dc9 100644 --- a/wqflask/utility/genofile_parser.py +++ b/wqflask/utility/genofile_parser.py @@ -8,6 +8,7 @@ import glob import traceback import gzip + import simplejson as json from pprint import pformat as pf @@ -34,12 +35,12 @@ class ConvertGenoFile(object): self.latest_row_value = None self.latest_col_value = None self.input_fh = open(input_file) - print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") self.haplotype_notation = { - '@mat': "3", - '@pat': "1", - '@het': "2", - '@unk': "NA" + '@mat': "1", + '@pat': "2", + '@het': "-999", + '@unk': "-999" } self.configurations = {} @@ -56,6 +57,8 @@ class ConvertGenoFile(object): self.mb_exists = True if 'cM' in row.split(): self.cm_exists = True + skip = 2 + self.cm_exists + self.mb_exists + self.individuals = row.split()[skip:] continue if row.startswith('@'): key, _separater, value = row.partition(':') @@ -88,9 +91,10 @@ class ConvertGenoFile(object): else: genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): - if genotype.upper() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper()]) + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) else: + print("WARNING:", genotype.upper()) this_marker.genotypes.append("NA") self.markers.append(this_marker.__dict__) |